diff --git a/improver/calibration/__init__.py b/improver/calibration/__init__.py
index 054a2d4b2b..fa6c58af99 100644
--- a/improver/calibration/__init__.py
+++ b/improver/calibration/__init__.py
@@ -6,6 +6,7 @@
 and coefficient inputs.
 """
 
+import glob
 from collections import OrderedDict
 from pathlib import Path
 from typing import Dict, List, Optional, Tuple, Union
@@ -19,6 +20,8 @@
     get_diagnostic_cube_name_from_probability_name,
 )
 from improver.utilities.cube_manipulation import MergeCubes
+from improver.utilities.flatten import flatten
+from improver.utilities.load import load_cubelist
 
 
 class CalibrationSchemas:
@@ -268,71 +271,76 @@ def split_forecasts_and_bias_files(cubes: CubeList) -> Tuple[Cube, Optional[Cube
     return forecast_cube, bias_cubes
 
 
-def split_pickle_parquet_and_netcdf(
-    files: List[Path],
-) -> Tuple[Optional[CubeList], Optional[List[Path]], Optional[object]]:
-    """Split the input files into pickle, parquet, and netcdf files.
-    Any or all of NetCDF, Parquet, and pickle files can be loaded. Only a single
-    pickle file is expected, but multiple netCDF and parquet files can be provided.
+def split_netcdf_parquet_pickle(files):
+    """Split the input files into netcdf, parquet, and pickle files.
+    Only a single pickle file is expected.
 
     Args:
         files:
-            A list of input file paths.
+            A list of input file paths which will be split into pickle,
+            parquet, and netcdf files.
+
     Returns:
-        - A flattened cube list containing all the cubes loaded from NetCDF files, or None.
-        - A list of paths to Parquet files, or None.
-        - A loaded pickle file, or None.
+        - A flattened cube list containing all the cubes contained within the
+          provided paths to NetCDF files.
+        - A list of paths to Parquet files.
+        - A loaded pickle file.
+
     Raises:
-        ValueError: If the path provided is not loadable as a pickle file, parquet file
-            or netcdf file.
         ValueError: If multiple pickle files provided, as only one is ever expected.
     """
-    cubes = iris.cube.CubeList()
-    loaded_pickle = None
+    cubes = CubeList([])
+    loaded_pickles = []
     parquets = []
 
-    for file_path in files:
-        if not file_path.exists():
-            continue
+    for file in files:
+        file_paths = glob.glob(str(file))
+        for file_path_str in file_paths:
+            file_path = Path(file_path_str)
+            if not file_path.exists():
+                continue
 
-        # Directories indicate we are working with parquet files.
-        if file_path.is_dir():
-            parquets.append(file_path)
-            continue
+            # Directories indicate we are working with parquet files.
+            if file_path.is_dir():
+                parquets.append(file_path)
+                continue
 
-        try:
-            cube = iris.load(file_path)
-            cubes.extend(cube)
-        except ValueError:
-            if loaded_pickle is not None:
-                msg = "Multiple pickle inputs have been provided. Only one is expected."
-                raise ValueError(msg)
             try:
-                loaded_pickle = joblib.load(file_path)
-            except Exception as e:
-                msg = f"Failed to load {file_path}: {e}"
-                raise ValueError(msg)
+                cube = load_cubelist(str(file_path))
+                cubes.extend(cube)
+            except ValueError:
+                try:
+                    loaded_pickles.append(joblib.load(file_path))
+                except Exception as e:
+                    msg = f"Failed to load {file_path}: {e}"
+                    raise ValueError(msg)
+
+    if len(loaded_pickles) > 1:
+        msg = "Multiple pickle inputs have been provided. Only one is expected."
+        raise ValueError(msg)
 
     return (
         cubes if cubes else None,
         parquets if parquets else None,
-        loaded_pickle if loaded_pickle else None,
+        loaded_pickles[0] if loaded_pickles else None,
     )
 
 
-def identify_parquet_type(
-    parquet_paths: List[Path],
-) -> Tuple[Optional[Path], Optional[Path]]:
+def identify_parquet_type(parquet_paths: List[Path]):
     """Determine whether the provided parquet paths contain forecast or truth data.
     This is done by checking the columns within the parquet files for the presence
     of a forecast_period column which is only present for forecast data.
+
     Args:
         parquet_paths:
             A list of paths to Parquet files.
+
+
     Returns:
         - The path to the Parquet file containing the historical forecasts.
         - The path to the Parquet file containing the truths.
     """
+    # import here to avoid dependency on pyarrow for all of improver
     import pyarrow.parquet as pq
 
     forecast_table_path = None
@@ -351,6 +359,134 @@ def identify_parquet_type(
     return forecast_table_path, truth_table_path
 
 
+def split_cubes_for_samos(
+    cubes: CubeList,
+    gam_features: List[str],
+    truth_attribute: Optional[str] = None,
+    expect_emos_coeffs: bool = False,
+    expect_emos_fields: bool = False,
+):
+    """Function to split the forecast, truth, gam additional predictors and emos
+    additional predictor cubes.
+
+    Args:
+        cubes:
+            A list of input cubes which will be split into relevant groups.
+        gam_features:
+            A list of strings containing the names of the additional fields
+            required for the SAMOS GAMs.
+        truth_attribute:
+            An attribute and its value in the format of "attribute=value",
+            which must be present on truth cubes. If None, no truth cubes are
+            expected or returned.
+        expect_emos_coeffs:
+            If True, EMOS coefficient cubes are expected to be found in the input
+            cubes. If False, an error will be raised if any such cubes are found.
+        expect_emos_fields:
+            If True, additional EMOS fields are expected to be found in the input
+            cubes. If False, an error will be raised if any such cubes are found.
+
+    Raises:
+        IOError:
+            If EMOS coefficients cubes are found when they are not expected.
+        IOError:
+            If additional fields cubes are found which do not match the features in
+            gam_features.
+        IOError:
+            If probability cubes are provided with more than one name.
+
+    Returns:
+        - A cube containing all the historic forecasts, or None if no such cubes
+          were found.
+        - A cube containing all the truth data, or None if no such cubes were found
+          or no truth_attribute was provided.
+        - A cubelist containing all the additional fields required for the GAMs,
+          or None if no such cubes were found.
+        - A cubelist containing all the EMOS coefficient cubes, or None if no such
+          cubes were found.
+        - A cubelist containing all the additional fields required for EMOS,
+          or None if no such cubes were found.
+        - A cube containing a probability template, or None if no such cube is found.
+    """
+    forecast = iris.cube.CubeList([])
+    truth = iris.cube.CubeList([])
+    gam_additional_fields = iris.cube.CubeList([])
+    emos_coefficients = iris.cube.CubeList([])
+    emos_additional_fields = iris.cube.CubeList([])
+    prob_template = None
+
+    # Prepare variables used to split forecast and truth.
+    truth_key, truth_value = None, None
+    if truth_attribute:
+        truth_key, truth_value = truth_attribute.split("=")
+
+    for cube in flatten(cubes):
+        if "time" in [c.name() for c in cube.coords()]:
+            if truth_key and cube.attributes.get(truth_key) == truth_value:
+                truth.append(cube.copy())
+            else:
+                forecast.append(cube.copy())
+        elif "emos_coefficient" in cube.name():
+            emos_coefficients.append(cube.copy())
+        elif cube.name() in gam_features:
+            gam_additional_fields.append(cube.copy())
+        else:
+            emos_additional_fields.append(cube.copy())
+
+    # Check that all required inputs are present and no unexpected cubes have been
+    # found.
+    missing_inputs = []
+    if len(forecast) == 0:
+        missing_inputs.append("forecast")
+    if truth_key and len(truth) == 0:
+        missing_inputs.append("truth")
+    if missing_inputs:
+        raise IOError(f"Missing {' and '.join(missing_inputs)} input.")
+
+    if not expect_emos_coeffs and len(emos_coefficients) > 0:
+        msg = (
+            f"Found EMOS coefficients cubes when they were not expected. The following "
+            f"such cubes were found: {[c.name() for c in emos_coefficients]}."
+        )
+        raise IOError(msg)
+
+    if not expect_emos_fields and len(emos_additional_fields) > 0:
+        msg = (
+            f"Found additional fields cubes which do not match the features in "
+            f"gam_features. The following cubes were found: "
+            f"{[c.name() for c in emos_additional_fields]}."
+        )
+        raise IOError(msg)
+
+    # Split out prob_template cube if required.
+    forecast_names = [c.name() for c in forecast]
+    prob_forecast_names = [name for name in forecast_names if "probability" in name]
+    if len(set(prob_forecast_names)) > 1:
+        msg = (
+            "Providing multiple probability cubes is not supported. A probability cube "
+            "can either be provided as the forecast or the probability template, but "
+            f"not both. Cubes provided: {prob_forecast_names}."
+        )
+        raise IOError(msg)
+    else:
+        if len(set(forecast_names)) > 1:
+            prob_template = forecast.extract(prob_forecast_names[0])[0]
+            forecast.remove(prob_template)
+
+    forecast = MergeCubes()(forecast)
+    if truth_key:
+        truth = MergeCubes()(truth)
+
+    return (
+        None if not forecast else forecast,
+        None if not truth else truth,
+        None if not gam_additional_fields else gam_additional_fields,
+        None if not emos_coefficients else emos_coefficients,
+        None if not emos_additional_fields else emos_additional_fields,
+        prob_template,
+    )
+
+
 def validity_time_check(forecast: Cube, validity_times: List[str]) -> bool:
     """Check the validity time of the forecast matches the accepted validity times
     within the validity times list.
diff --git a/improver/calibration/emos_calibration.py b/improver/calibration/emos_calibration.py
index 78e204f9ee..59123e2e49 100644
--- a/improver/calibration/emos_calibration.py
+++ b/improver/calibration/emos_calibration.py
@@ -885,9 +885,10 @@ def _create_cubelist(
             cubelist is for a separate EMOS coefficient e.g. alpha, beta,
             gamma, delta.
         """
-        (template_dims, spatial_associated_coords) = (
-            self._get_spatial_associated_coordinates(historic_forecasts)
-        )
+        (
+            template_dims,
+            spatial_associated_coords,
+        ) = self._get_spatial_associated_coordinates(historic_forecasts)
         coords_to_replace = (
             self._create_temporal_coordinates(historic_forecasts)
             + spatial_associated_coords
@@ -1193,6 +1194,7 @@ def guess_and_minimise(
                 forecast_predictor_data = np.ma.stack(
                     broadcast_data_to_time_coord(forecast_predictors)
                 )
+
             initial_guess = self.compute_initial_guess(
                 truths.data,
                 forecast_predictor_data,
@@ -1743,181 +1745,6 @@ def _check_additional_field_sites(self, forecast, additional_fields):
                     )
                     raise ValueError(msg)
 
-    @staticmethod
-    def _get_attribute(
-        coefficients: CubeList, attribute_name: str, optional: bool = False
-    ) -> Optional[Any]:
-        """Get the value for the requested attribute, ensuring that the
-        attribute is present consistently across the cubes within the
-        coefficients cubelist.
-
-        Args:
-            coefficients:
-                EMOS coefficients
-            attribute_name:
-                Name of expected attribute
-            optional:
-                Indicate whether the attribute is allowed to be optional.
-
-        Returns:
-            Returns None if the attribute is not present. Otherwise,
-            the value of the attribute is returned.
-
-        Raises:
-            ValueError: If coefficients do not share the expected attributes.
-        """
-        attributes = [
-            str(c.attributes[attribute_name])
-            for c in coefficients
-            if c.attributes.get(attribute_name) is not None
-        ]
-
-        if not attributes and optional:
-            return None
-        if not attributes and not optional:
-            msg = (
-                f"The {attribute_name} attribute must be specified on all "
-                "coefficients cubes."
-            )
-            raise AttributeError(msg)
-
-        if len(set(attributes)) == 1 and len(attributes) == len(coefficients):
-            return coefficients[0].attributes[attribute_name]
-
-        msg = (
-            "Coefficients must share the same {0} attribute. "
-            "{0} attributes provided: {1}".format(attribute_name, attributes)
-        )
-        raise AttributeError(msg)
-
-    def _get_input_forecast_type(self, forecast: Cube):
-        """Identifies whether the forecast is in probability, realization
-        or percentile space.
-
-        Args:
-            forecast
-
-        """
-        try:
-            find_percentile_coordinate(forecast)
-        except CoordinateNotFoundError:
-            if forecast.name().startswith("probability_of"):
-                forecast_type = "probabilities"
-            else:
-                forecast_type = "realizations"
-        else:
-            forecast_type = "percentiles"
-        self.input_forecast_type = forecast_type
-
-    def _convert_to_realizations(
-        self, forecast: Cube, realizations_count: Optional[int], ignore_ecc_bounds: bool
-    ) -> Cube:
-        """Convert an input forecast of probabilities or percentiles into
-        pseudo-realizations.
-
-        Args:
-            forecast
-            realizations_count:
-                Number of pseudo-realizations to generate from the input
-                forecast
-            ignore_ecc_bounds
-
-        Returns:
-            Cube with pseudo-realizations
-        """
-        if not realizations_count:
-            raise ValueError(
-                "The 'realizations_count' argument must be defined "
-                "for forecasts provided as {}".format(self.input_forecast_type)
-            )
-
-        if self.input_forecast_type == "probabilities":
-            conversion_plugin = ConvertProbabilitiesToPercentiles(
-                ecc_bounds_warning=ignore_ecc_bounds
-            )
-        if self.input_forecast_type == "percentiles":
-            conversion_plugin = ResamplePercentiles(
-                ecc_bounds_warning=ignore_ecc_bounds
-            )
-
-        forecast_as_percentiles = conversion_plugin(
-            forecast, no_of_percentiles=realizations_count
-        )
-        forecast_as_realizations = RebadgePercentilesAsRealizations()(
-            forecast_as_percentiles
-        )
-
-        return forecast_as_realizations
-
-    def _format_forecast(
-        self, template: Cube, randomise: bool, random_seed: Optional[int]
-    ) -> Cube:
-        """
-        Generate calibrated probability, percentile or realization output in
-        the desired format.
-
-        Args:
-            template:
-                A template cube containing the coordinates and metadata expected
-                on the calibrated forecast.
-            randomise:
-                If True, order realization output randomly rather than using
-                the input forecast.  If forecast type is not realizations, this
-                is ignored.
-            random_seed:
-                For realizations input if randomise is True, random seed for
-                generating re-ordered percentiles.  If randomise is False, the
-                random seed may still be used for splitting ties.
-
-        Returns:
-            Calibrated forecast
-        """
-        if self.output_forecast_type == "probabilities":
-            conversion_plugin = ConvertLocationAndScaleParametersToProbabilities(
-                distribution=self.distribution["name"],
-                shape_parameters=self.distribution["shape"],
-            )
-            result = conversion_plugin(
-                self.distribution["location"], self.distribution["scale"], template
-            )
-
-        else:
-            conversion_plugin = ConvertLocationAndScaleParametersToPercentiles(
-                distribution=self.distribution["name"],
-                shape_parameters=self.distribution["shape"],
-            )
-
-            if self.output_forecast_type == "percentiles":
-                perc_coord = find_percentile_coordinate(template)
-                result = conversion_plugin(
-                    self.distribution["location"],
-                    self.distribution["scale"],
-                    template,
-                    percentiles=(
-                        self.percentiles if self.percentiles else perc_coord.points
-                    ),
-                )
-            else:
-                no_of_percentiles = len(template.coord("realization").points)
-                percentiles = conversion_plugin(
-                    self.distribution["location"],
-                    self.distribution["scale"],
-                    template,
-                    no_of_percentiles=no_of_percentiles,
-                )
-                result = EnsembleReordering().process(
-                    percentiles,
-                    template,
-                    random_ordering=randomise,
-                    random_seed=random_seed,
-                )
-
-            # Preserve cell methods from template.
-            for cm in template.cell_methods:
-                result.add_cell_method(cm)
-
-        return result
-
     def process(
         self,
         forecast: Cube,
@@ -1931,7 +1758,8 @@ def process(
         predictor: str = "mean",
         randomise: bool = False,
         random_seed: Optional[int] = None,
-    ) -> Cube:
+        return_parameters: bool = False,
+    ) -> Union[Cube, CubeList]:
         """Calibrate input forecast using pre-calculated coefficients
 
         Args:
@@ -1969,12 +1797,18 @@ def process(
             random_seed:
                 Used in generating calibrated realizations.  If input forecast
                 is probabilities or percentiles, this is ignored.
+            return_parameters:
+                If True, return location and scale parameters of the calibrated forecast
+                distribution, rather than forecasts. This option supercedes all other
+                inputs which affect the format of the output.
 
         Returns:
             Calibrated forecast in the form of the input (ie probabilities
-            percentiles or realizations)
+            percentiles or realizations), or a tuple containing cubes of location and
+            scale parameters of the calibrated forecast distribution if
+            return_parameters is True.
         """
-        self._get_input_forecast_type(forecast)
+        self.input_forecast_type = get_forecast_type(forecast)
         self.output_forecast_type = (
             "probabilities" if prob_template else self.input_forecast_type
         )
@@ -1992,7 +1826,7 @@ def process(
 
         forecast_as_realizations = forecast.copy()
         if self.input_forecast_type != "realizations":
-            forecast_as_realizations = self._convert_to_realizations(
+            forecast_as_realizations = convert_to_realizations(
                 forecast.copy(), realizations_count, ignore_ecc_bounds
             )
 
@@ -2007,20 +1841,222 @@ def process(
             tolerate_time_mismatch=tolerate_time_mismatch,
         )
 
-        self.distribution = {
-            "name": self._get_attribute(coefficients, "distribution"),
-            "location": location_parameter,
-            "scale": scale_parameter,
-            "shape": self._get_attribute(
-                coefficients, "shape_parameters", optional=True
-            ),
-        }
+        if return_parameters:
+            return location_parameter, scale_parameter
+        else:
+            self.distribution = {
+                "name": get_attribute_from_coefficients(coefficients, "distribution"),
+                "location": location_parameter,
+                "scale": scale_parameter,
+                "shape": get_attribute_from_coefficients(
+                    coefficients, "shape_parameters", optional=True
+                ),
+            }
 
-        template = prob_template if prob_template else forecast
-        result = self._format_forecast(template, randomise, random_seed)
+            template = prob_template if prob_template else forecast
+            result = generate_forecast_from_distribution(
+                self.distribution, template, self.percentiles, randomise, random_seed
+            )
 
-        if land_sea_mask:
-            # fill in masked sea points with uncalibrated data
-            merge_land_and_sea(result, forecast)
+            if land_sea_mask:
+                # fill in masked sea points with uncalibrated data
+                merge_land_and_sea(result, forecast)
 
-        return result
+            return result
+
+
+def get_forecast_type(forecast: Cube) -> str:
+    """Identifies whether the forecast is in probability, realization
+    or percentile space.
+
+    Args:
+        forecast
+
+    Returns:
+        forecast_type: str
+            One of "probabilities", "realizations" or "percentiles"
+    """
+    try:
+        find_percentile_coordinate(forecast)
+    except CoordinateNotFoundError:
+        if forecast.name().startswith("probability_of"):
+            forecast_type = "probabilities"
+        else:
+            forecast_type = "realizations"
+    else:
+        forecast_type = "percentiles"
+
+    return forecast_type
+
+
+def convert_to_realizations(
+    forecast: Cube, realizations_count: Optional[int], ignore_ecc_bounds: bool
+) -> Cube:
+    """Convert an input forecast of probabilities or percentiles into
+    pseudo-realizations.
+
+    Args:
+        forecast
+        realizations_count:
+            Number of pseudo-realizations to generate from the input
+            forecast
+        ignore_ecc_bounds:
+            If True, allow percentiles from probabilities to exceed the ECC bounds
+            range.  If input is not probabilities, this is ignored.
+
+    Returns:
+        Cube with pseudo-realizations.
+    """
+    input_forecast_type = get_forecast_type(forecast)
+    if not realizations_count:
+        raise ValueError(
+            "The 'realizations_count' argument must be defined "
+            f"for forecasts provided as {input_forecast_type}"
+        )
+
+    if input_forecast_type == "probabilities":
+        conversion_plugin = ConvertProbabilitiesToPercentiles(
+            ecc_bounds_warning=ignore_ecc_bounds
+        )
+    if input_forecast_type == "percentiles":
+        conversion_plugin = ResamplePercentiles(ecc_bounds_warning=ignore_ecc_bounds)
+
+    forecast_as_percentiles = conversion_plugin(
+        forecast, no_of_percentiles=realizations_count
+    )
+    forecast_as_realizations = RebadgePercentilesAsRealizations()(
+        forecast_as_percentiles
+    )
+
+    return forecast_as_realizations
+
+
+def get_attribute_from_coefficients(
+    coefficients: CubeList, attribute_name: str, optional: bool = False
+) -> Optional[Any]:
+    """Get the value for the requested attribute, ensuring that the
+    attribute is present consistently across the cubes within the
+    coefficients cubelist.
+
+    Args:
+        coefficients:
+            EMOS coefficients
+        attribute_name:
+            Name of expected attribute
+        optional:
+            Indicate whether the attribute is allowed to be optional.
+
+    Returns:
+        Returns None if the attribute is not present. Otherwise,
+        the value of the attribute is returned.
+
+    Raises:
+        AttributeError: If the expected attribute is not on all coefficients cubes.
+        AttributeError: If the expected attribute is not the same across all
+            coefficients cubes.
+    """
+    attributes = [
+        str(c.attributes[attribute_name])
+        for c in coefficients
+        if c.attributes.get(attribute_name) is not None
+    ]
+
+    if not attributes and optional:
+        return None
+    if not attributes and not optional:
+        msg = (
+            f"The {attribute_name} attribute must be specified on all "
+            "coefficients cubes."
+        )
+        raise AttributeError(msg)
+
+    if len(set(attributes)) == 1 and len(attributes) == len(coefficients):
+        return coefficients[0].attributes[attribute_name]
+
+    msg = (
+        "Coefficients must share the same {0} attribute. "
+        "{0} attributes provided: {1}".format(attribute_name, attributes)
+    )
+    raise AttributeError(msg)
+
+
+def generate_forecast_from_distribution(
+    distribution: Dict,
+    template: Cube,
+    percentiles: Optional[Sequence],
+    randomise: bool,
+    random_seed: Optional[int],
+) -> Cube:
+    """
+    Generate calibrated probability, percentile or realization output in
+    the desired format.
+
+    Args:
+        distribution:
+            A dictionary with the following key-value pairs:
+            - "name": the name of the distribution
+            - "location": a cube of location parameters
+            - "scale": a cube of scale parameters
+            - "shape": an optional array of shape parameters
+        template:
+            A template cube containing the coordinates and metadata expected
+            on the calibrated forecast.
+        percentiles:
+            The set of percentiles used to create the calibrated forecast if the
+            template cube contains percentile data.
+        randomise:
+            If True, order realization output randomly rather than using
+            the input forecast.  If forecast type is not realizations, this
+            is ignored.
+        random_seed:
+            For realizations input if randomise is True, random seed for
+            generating re-ordered percentiles.  If randomise is False, the
+            random seed may still be used for splitting ties.
+
+    Returns:
+        Calibrated forecast
+    """
+    output_forecast_type = get_forecast_type(template)
+    if output_forecast_type == "probabilities":
+        conversion_plugin = ConvertLocationAndScaleParametersToProbabilities(
+            distribution=distribution["name"],
+            shape_parameters=distribution["shape"],
+        )
+        result = conversion_plugin(
+            distribution["location"], distribution["scale"], template
+        )
+
+    else:
+        conversion_plugin = ConvertLocationAndScaleParametersToPercentiles(
+            distribution=distribution["name"],
+            shape_parameters=distribution["shape"],
+        )
+
+        if output_forecast_type == "percentiles":
+            perc_coord = find_percentile_coordinate(template)
+            result = conversion_plugin(
+                distribution["location"],
+                distribution["scale"],
+                template,
+                percentiles=(percentiles if percentiles else perc_coord.points),
+            )
+        else:
+            no_of_percentiles = len(template.coord("realization").points)
+            percentiles = conversion_plugin(
+                distribution["location"],
+                distribution["scale"],
+                template,
+                no_of_percentiles=no_of_percentiles,
+            )
+            result = EnsembleReordering().process(
+                percentiles,
+                template,
+                random_ordering=randomise,
+                random_seed=random_seed,
+            )
+
+        # Preserve cell methods from template.
+        for cm in template.cell_methods:
+            result.add_cell_method(cm)
+
+    return result
diff --git a/improver/calibration/load_and_train_quantile_regression_random_forest.py b/improver/calibration/load_and_train_quantile_regression_random_forest.py
index bc342af45f..5a8d9d558c 100644
--- a/improver/calibration/load_and_train_quantile_regression_random_forest.py
+++ b/improver/calibration/load_and_train_quantile_regression_random_forest.py
@@ -20,7 +20,7 @@
     CalibrationSchemas,
     get_training_period_cycles,
     identify_parquet_type,
-    split_pickle_parquet_and_netcdf,
+    split_netcdf_parquet_pickle,
 )
 from improver.calibration.quantile_regression_random_forest import (
     TrainQuantileRegressionRandomForests,
@@ -290,7 +290,7 @@ def process(
         """
         if not self.quantile_forest_installed:
             return None, None, None
-        cube_inputs, parquets, _ = split_pickle_parquet_and_netcdf(file_paths)
+        cube_inputs, parquets, _ = split_netcdf_parquet_pickle(file_paths)
 
         # If there are no parquet files, return None.
         if not parquets:
diff --git a/improver/calibration/samos_calibration.py b/improver/calibration/samos_calibration.py
index 7535aa0560..4d7e188780 100644
--- a/improver/calibration/samos_calibration.py
+++ b/improver/calibration/samos_calibration.py
@@ -7,7 +7,7 @@
 Statistics (SAMOS).
 """
 
-from typing import Dict, List, Optional, Tuple
+from typing import Dict, List, Optional, Sequence, Tuple
 
 import iris
 import iris.pandas
@@ -25,14 +25,21 @@ def GAM(self):
 from iris.analysis import MEAN, STD_DEV
 from iris.cube import Cube, CubeList
 from iris.util import new_axis
-from numpy import array, int64, nan
+from numpy import array, clip, float32, int64, nan
 from numpy.ma import masked_all_like
 from pandas import merge
 
-from improver import BasePlugin
+from improver import BasePlugin, PostProcessingPlugin
 from improver.calibration.emos_calibration import (
+    ApplyEMOS,
     EstimateCoefficientsForEnsembleCalibration,
+    convert_to_realizations,
+    generate_forecast_from_distribution,
+    get_attribute_from_coefficients,
+    get_forecast_type,
 )
+from improver.ensemble_copula_coupling.utilities import get_bounds_of_distribution
+from improver.metadata.probabilistic import find_threshold_coordinate
 from improver.utilities.cube_manipulation import collapse_realizations
 from improver.utilities.generalized_additive_models import GAMFit, GAMPredict
 from improver.utilities.mathematical_operations import CalculateClimateAnomalies
@@ -44,6 +51,7 @@ def GAM(self):
 def prepare_data_for_gam(
     input_cube: Cube,
     additional_fields: Optional[CubeList] = None,
+    unique_site_id_key: Optional[str] = None,
 ) -> pd.DataFrame:
     """
     Convert input cubes in to a single, combined dataframe.
@@ -56,9 +64,15 @@ def prepare_data_for_gam(
     additional_fields cubes.
 
     Args:
-        input_cube: A cube of forecast or observation data.
-        additional_fields: Additional cubes with points which can be matched with points
-        in input_cube by matching spatial coordinate values.
+        input_cube:
+            A cube of forecast or observation data.
+        additional_fields:
+            Additional cubes with points which can be matched with points
+            in input_cube by matching spatial coordinate values.
+        unique_site_id_key:
+            If working with spot data and available, the name of the coordinate
+            in the input cubes that contains unique site IDs, e.g. "wmo_id" if
+            all sites have a valid wmo_id.
 
     Returns:
         A pandas dataframe with rows equal to the number of sites/grid points in
@@ -77,11 +91,25 @@ def prepare_data_for_gam(
         add_ancillary_variables=True,
     )
     df.reset_index(inplace=True)
+
     if additional_fields:
-        spatial_coords = [
-            input_cube.coord(axis="X").name(),
-            input_cube.coord(axis="Y").name(),
-        ]
+        # Check if we are dealing with spot data.
+        site_data = "spot_index" in [c.name() for c in input_cube.coords()]
+
+        # For site data we should use unique IDs wherever possible. As a
+        # fallback we can match on latitude, longitude and altitude. We
+        # need altitude to accommodate e.g. sites with a lower and upper
+        # forecast altitude. If we are working with gridded data we use the
+        # spatial coordinates of the input cube.
+        if site_data and unique_site_id_key is not None:
+            match_coords = [unique_site_id_key]
+        elif site_data:
+            match_coords = ["latitude", "longitude", "altitude"]
+        else:
+            match_coords = [
+                input_cube.coord(axis="X").name(),
+                input_cube.coord(axis="Y").name(),
+            ]
         for cube in additional_fields:
             new_df = iris.pandas.as_data_frame(
                 cube,
@@ -90,9 +118,7 @@ def prepare_data_for_gam(
                 add_ancillary_variables=True,
             )
             new_df.reset_index(inplace=True)
-            match_coords = spatial_coords.copy()
-            match_coords.append(cube.name())
-            df = merge(left=df, right=new_df[match_coords], how="left")
+            df = merge(left=df, right=new_df[match_coords + [cube.name()]], how="left")
 
     return df
 
@@ -130,6 +156,63 @@ def convert_dataframe_to_cube(
     return result
 
 
+def get_climatological_stats(
+    input_cube: Cube,
+    gams: List,
+    gam_features: List[str],
+    additional_cubes: Optional[CubeList],
+    sd_clip: float = 0.25,
+    unique_site_id_key: Optional[str] = None,
+) -> Tuple[Cube, Cube]:
+    """Function to predict climatological means and standard deviations given fitted
+    GAMs for each statistic and cubes which can be used to construct a dataframe
+    containing all required features for those GAMs.
+
+    Args:
+        input_cube
+        gams: A list containing two fitted GAMs, the first for predicting the
+            climatological mean of the locations in input_cube and the second
+            predicting the climatological standard deviation.
+        gam_features:
+            The list of features. These must be either coordinates on input_cube or
+            share a name with a cube in additional_cubes. The index of each
+            feature should match the indices used in model_specification.
+        additional_cubes:
+            Additional fields to use as supplementary predictors.
+        sd_clip:
+            The minimum standard deviation value to allow when predicting from the GAM.
+            Any predictions below this value will be set to this value.
+        unique_site_id_key:
+            If working with spot data and available, the name of the coordinate
+            in the input cubes that contains unique site IDs, e.g. "wmo_id" if
+            all sites have a valid wmo_id.
+
+    Returns:
+        A pair of cubes containing climatological mean and climatological standard
+        deviation predictions respectively.
+    """
+    diagnostic = input_cube.name()
+
+    df = prepare_data_for_gam(
+        input_cube, additional_cubes, unique_site_id_key=unique_site_id_key
+    )
+
+    # Calculate climatological means and standard deviations using previously
+    # fitted GAMs.
+    mean_pred = GAMPredict().process(gams[0], df[gam_features])
+    sd_pred = GAMPredict().process(gams[1], df[gam_features])
+
+    # Convert means and standard deviations into cubes
+    df[diagnostic] = mean_pred
+    mean_cube = convert_dataframe_to_cube(df, input_cube)
+
+    df[diagnostic] = sd_pred
+    sd_cube = convert_dataframe_to_cube(df, input_cube)
+    sd_cube.data = clip(sd_cube.data, a_min=sd_clip, a_max=None)
+
+    return mean_cube, sd_cube
+
+
 class TrainGAMsForSAMOS(BasePlugin):
     """
     Class for fitting Generalised Additive Models (GAMs) to training data for use in
@@ -150,6 +233,7 @@ def __init__(
         link: str = "identity",
         fit_intercept: bool = True,
         window_length: int = 11,
+        unique_site_id_key: Optional[str] = None,
     ):
         """
         Initialize the class.
@@ -157,29 +241,33 @@ def __init__(
         Args:
             model_specification:
                 A list of lists which each contain three items (in order):
-                    1. a string containing a single pyGAM term; one of 'linear',
-                    'spline', 'tensor', or 'factor'
-                    2. a list of integers which correspond to the features to be
-                    included in that term
-                    3. a dictionary of kwargs to be included when defining the term
+                1. a string containing a single pyGAM term; one of 'linear',
+                'spline', 'tensor', or 'factor'.
+                2. a list of integers which correspond to the features to be
+                included in that term.
+                3. a dictionary of kwargs to be included when defining the term.
             max_iter:
                 A pyGAM argument which determines the maximum iterations allowed when
-                fitting the GAM
+                fitting the GAM.
             tol:
                 A pyGAM argument determining the tolerance used to define the stopping
-                criteria
+                criteria.
             distribution:
-                A pyGAM argument determining the distribution to be used in the model
+                A pyGAM argument determining the distribution to be used in the model.
             link:
-                A pyGAM argument determining the link function to be used in the model
+                A pyGAM argument determining the link function to be used in the model.
             fit_intercept:
                 A pyGAM argument determining whether to include an intercept term in
-                the model
+                the model.
             window_length:
                 The length of the rolling window used to calculate the mean and standard
                 deviation of the input cube when the input cube does not have a
                 realization dimension coordinate. This must be an odd integer greater
                 than 1.
+            unique_site_id_key:
+                An optional key to use for uniquely identifying each site in the
+                training data. If not provided, the default behavior is to use the
+                spatial coordinates (latitude, longitude) of each site.
         """
         self.model_specification = model_specification
         self.max_iter = max_iter
@@ -187,6 +275,7 @@ def __init__(
         self.distribution = distribution
         self.link = link
         self.fit_intercept = fit_intercept
+        self.unique_site_id_key = unique_site_id_key
 
         if window_length < 3 or window_length % 2 == 0 or window_length % 1 != 0:
             raise ValueError(
@@ -201,6 +290,7 @@ def apply_aggregator(
     ) -> Cube:
         """
         Internal function to apply rolling window aggregator to padded cube.
+
         Args:
             padded_cube:
                 The cube to have rolling window calculation applied to.
@@ -448,11 +538,11 @@ def process(
         )
 
         for stat_cube in stat_cubes:
-            df = prepare_data_for_gam(stat_cube, additional_fields)
-
+            df = prepare_data_for_gam(
+                stat_cube, additional_fields, unique_site_id_key=self.unique_site_id_key
+            )
             feature_values = df[features].values
             targets = df[input_cube.name()].values
-
             output.append(plugin.process(feature_values, targets))
 
         return output
@@ -478,6 +568,7 @@ def __init__(
         self,
         distribution: str,
         emos_kwargs: Optional[Dict] = None,
+        unique_site_id_key: Optional[str] = None,
     ) -> None:
         """Initialize the class.
 
@@ -488,56 +579,14 @@ def __init__(
             emos_kwargs: Keyword arguments accepted by the
                 EstimateCoefficientsForEnsembleCalibration plugin. Should not contain
                 a distribution argument.
+            unique_site_id_key:
+                If working with spot data and available, the name of the coordinate
+                in the input cubes that contains unique site IDs, e.g. "wmo_id" if
+                all sites have a valid wmo_id.
         """
         self.distribution = distribution
         self.emos_kwargs = emos_kwargs if emos_kwargs else {}
-
-    @staticmethod
-    def get_climatological_stats(
-        input_cube: Cube,
-        gams: List[pygam.GAM],
-        gam_features: List[str],
-        additional_fields: Optional[CubeList],
-    ) -> Tuple[Cube, Cube]:
-        """Function to predict climatological means and standard deviations given fitted
-        GAMs for each statistic and cubes which can be used to construct a dataframe
-        containing all required features for those GAMs.
-
-        Args:
-            input_cube:
-                A cube of forecasts or observations from the training dataset.
-            gams:
-                A list containing two fitted GAMs, the first for predicting the
-                climatological mean of the locations in input_cube and the second
-                predicting the climatological standard deviation.
-            gam_features:
-                The list of features. These must be either coordinates on input_cube or
-                share a name with a cube in additional_fields. The index of each
-                feature should match the indices used in model_specification.
-            additional_fields:
-                Additional fields to use as supplementary predictors.
-
-        Returns:
-            A pair of cubes containing climatological mean and climatological standard
-            deviation predictions respectively.
-        """
-        diagnostic = input_cube.name()
-
-        df = prepare_data_for_gam(input_cube, additional_fields)
-
-        # Calculate climatological means and standard deviations using previously
-        # fitted GAMs.
-        mean_pred = GAMPredict().process(gams[0], df[gam_features])
-        sd_pred = GAMPredict().process(gams[1], df[gam_features])
-
-        # Convert means and standard deviations into cubes
-        df[diagnostic] = mean_pred
-        mean_cube = convert_dataframe_to_cube(df, input_cube)
-
-        df[diagnostic] = sd_pred
-        sd_cube = convert_dataframe_to_cube(df, input_cube)
-
-        return mean_cube, sd_cube
+        self.unique_site_id_key = unique_site_id_key
 
     def climate_anomaly_emos(
         self,
@@ -566,7 +615,6 @@ def climate_anomaly_emos(
                 land points are used to calculate the coefficients. Within the
                 land-sea mask cube land points should be specified as ones,
                 and sea points as zeros.
-
         Returns:
             CubeList constructed using the coefficients provided and using
             metadata from the historic_forecasts cube. Each cube within the
@@ -634,18 +682,25 @@ def process(
                 land points are used to calculate the EMOS coefficients. Within the
                 land-sea mask cube land points should be specified as ones,
                 and sea points as zeros.
-
         Returns:
             CubeList constructed using the coefficients provided and using
             metadata from the historic_forecasts cube. Each cube within the
             cubelist is for a separate EMOS coefficient e.g. alpha, beta,
             gamma, delta.
         """
-        forecast_mean, forecast_sd = self.get_climatological_stats(
-            historic_forecasts, forecast_gams, gam_features, gam_additional_fields
+        forecast_mean, forecast_sd = get_climatological_stats(
+            historic_forecasts,
+            forecast_gams,
+            gam_features,
+            gam_additional_fields,
+            unique_site_id_key=self.unique_site_id_key,
         )
-        truth_mean, truth_sd = self.get_climatological_stats(
-            truths, truth_gams, gam_features, gam_additional_fields
+        truth_mean, truth_sd = get_climatological_stats(
+            truths,
+            truth_gams,
+            gam_features,
+            gam_additional_fields,
+            unique_site_id_key=self.unique_site_id_key,
         )
 
         emos_coefficients = self.climate_anomaly_emos(
@@ -656,3 +711,252 @@ def process(
         )
 
         return emos_coefficients
+
+
+class ApplySAMOS(PostProcessingPlugin):
+    """
+    Class to calibrate an input forecast using SAMOS given the following inputs:
+    - Two GAMs which model, respectively, the climatological mean and standard
+    deviation of the forecast. This allows the forecast to be converted to
+    climatological anomalies.
+    - A set of EMOS coefficients which can be applied to correct the climatological
+    anomalies.
+    """
+
+    def __init__(
+        self,
+        percentiles: Optional[Sequence] = None,
+        unique_site_id_key: Optional[str] = None,
+    ):
+        """Initialize class.
+
+        Args:
+            percentiles:
+                The set of percentiles used to create the calibrated forecast.
+            unique_site_id_key:
+                If working with spot data and available, the name of the coordinate
+                in the input cubes that contains unique site IDs, e.g. "wmo_id" if
+                all sites have a valid wmo_id.
+        """
+        self.percentiles = [float32(p) for p in percentiles] if percentiles else None
+        self.unique_site_id_key = unique_site_id_key
+
+    def transform_anomalies_to_original_units(
+        self,
+        location_parameter: Cube,
+        scale_parameter: Cube,
+        truth_mean: Cube,
+        truth_sd: Cube,
+        forecast: Cube,
+        input_forecast_type: str,
+    ) -> None:
+        """Function to transform location and scale parameters which describe a
+        climatological anomaly distribution to location and scale parameters which
+        describe a distribution in the units of the original forecast. Both parameter
+        cubes are modified in place.
+
+        Predictions of mean and standard deviation from the 'truth' GAMs are used for
+        this transformation. This ensures that the calibrated forecast follows the
+        'true' distribution, rather than the distribution of the original forecast,
+        following the suggested method in:
+
+        Dabernig, M., Mayr, G.J., Messner, J.W. and Zeileis, A. (2017).
+        Spatial ensemble post-processing with standardized anomalies.
+        Q.J.R. Meteorol. Soc, 143: 909-916. https://doi.org/10.1002/qj.2975
+
+        Args:
+            location_parameter:
+                Cube containing the location parameter of the climatological anomaly
+                distribution. This is modified in place.
+            scale_parameter:
+                Cube containing the scale parameter of the climatological anomaly
+                distribution. This is modified in place.
+            truth_mean:
+                Cube containing climatological mean predictions of the truths.
+            truth_sd:
+                Cube containing climatological standard deviation predictions of the
+                truths.
+            forecast:
+                The original, uncalibrated forecast.
+            input_forecast_type:
+                The type of the original, uncalibrated forecast. One of 'realizations',
+                'percentiles' or 'probabilities'.
+        """
+
+        # The data in these cubes are identical along the realization dimensions.
+        truth_mean = next(truth_mean.slices_over("realization"))
+        truth_sd = next(truth_sd.slices_over("realization"))
+
+        # Transform location and scale parameters so that they represent a distribution
+        # in the units of the original forecast, rather than climatological anomalies.
+        forecast_units = (
+            forecast.units
+            if input_forecast_type != "probabilities"
+            else find_threshold_coordinate(forecast).units
+        )
+        location_parameter.data = (
+            location_parameter.data * truth_sd.data
+        ) + truth_mean.data
+        location_parameter.units = forecast_units
+
+        # The scale parameter returned by ApplyEMOS is the standard deviation for a
+        # normal distribution. To get the desired standard deviation in
+        # realization/percentile space we must multiply by the estimated forecast
+        # standard deviation.
+        scale_parameter.data = scale_parameter.data * truth_sd.data
+        scale_parameter.units = forecast_units
+
+    def process(
+        self,
+        forecast: Cube,
+        forecast_gams: List,
+        truth_gams: List,
+        gam_features: List[str],
+        emos_coefficients: CubeList,
+        gam_additional_fields: Optional[CubeList] = None,
+        emos_additional_fields: Optional[CubeList] = None,
+        prob_template: Optional[Cube] = None,
+        realizations_count: Optional[int] = None,
+        ignore_ecc_bounds: bool = True,
+        tolerate_time_mismatch: bool = False,
+        predictor: str = "mean",
+        randomise: bool = False,
+        random_seed: Optional[int] = None,
+    ):
+        """Calibrate input forecast using GAMs to convert the forecast to climatological
+         anomalies and pre-calculated EMOS coefficients to apply to those anomalies.
+
+        Args:
+            forecast:
+                Uncalibrated forecast as probabilities, percentiles or
+                realizations.
+            forecast_gams:
+                A list containing two fitted GAMs, the first for predicting the
+                climatological mean of the historic forecasts at each location and the
+                second predicting the climatological standard deviation.
+            truth_gams:
+                A list containing two fitted GAMs, the first for predicting the
+                climatological mean of the truths at each location and the
+                second predicting the climatological standard deviation.
+            gam_features:
+                The list of features. These must be either coordinates on input_cube or
+                share a name with a cube in additional_cubes. The index of each
+                feature should match the indices used in model_specification.
+            emos_coefficients:
+                EMOS coefficients.
+            gam_additional_fields:
+                Additional fields to use as supplementary predictors in the GAMs.
+            emos_additional_fields:
+                Additional fields to use as supplementary predictors in EMOS.
+            prob_template:
+                A cube containing a probability forecast that will be used as
+                a template when generating probability output when the input
+                format of the forecast cube is not probabilities i.e. realizations
+                or percentiles.
+            realizations_count:
+                Number of realizations to use when generating the intermediate
+                calibrated forecast from probability or percentile inputs
+            ignore_ecc_bounds:
+                If True, allow percentiles from probabilities to exceed the ECC
+                bounds range.  If input is not probabilities, this is ignored.
+            tolerate_time_mismatch:
+                If True, tolerate a mismatch in validity time and forecast
+                period for coefficients vs forecasts. Use with caution!
+            predictor:
+                Predictor to be used to calculate the location parameter of the
+                calibrated distribution.  Value is "mean" or "realizations".
+            randomise:
+                Used in generating calibrated realizations.  If input forecast
+                is probabilities or percentiles, this is ignored.
+            random_seed:
+                Used in generating calibrated realizations.  If input forecast
+                is probabilities or percentiles, this is ignored.
+
+        Returns:
+            Calibrated forecast in the form of the input (ie probabilities
+            percentiles or realizations).
+        """
+        input_forecast_type = get_forecast_type(forecast)
+
+        forecast_as_realizations = forecast.copy()
+        if input_forecast_type != "realizations":
+            forecast_as_realizations = convert_to_realizations(
+                forecast.copy(), realizations_count, ignore_ecc_bounds
+            )
+        forecast_mean, forecast_sd = get_climatological_stats(
+            forecast_as_realizations,
+            forecast_gams,
+            gam_features,
+            gam_additional_fields,
+            unique_site_id_key=self.unique_site_id_key,
+        )
+        forecast_ca = CalculateClimateAnomalies(ignore_temporal_mismatch=True).process(
+            diagnostic_cube=forecast_as_realizations,
+            mean_cube=forecast_mean,
+            std_cube=forecast_sd,
+        )
+
+        # Returns parameters which describe a climate anomaly distribution.
+        location_parameter, scale_parameter = ApplyEMOS(
+            percentiles=self.percentiles
+        ).process(
+            forecast=forecast_ca,
+            coefficients=emos_coefficients,
+            additional_fields=emos_additional_fields,
+            prob_template=prob_template,
+            realizations_count=realizations_count,
+            ignore_ecc_bounds=ignore_ecc_bounds,
+            tolerate_time_mismatch=tolerate_time_mismatch,
+            predictor=predictor,
+            randomise=randomise,
+            random_seed=random_seed,
+            return_parameters=True,
+        )
+
+        truth_mean, truth_sd = get_climatological_stats(
+            forecast_as_realizations,
+            truth_gams,
+            gam_features,
+            gam_additional_fields,
+            unique_site_id_key=self.unique_site_id_key,
+        )
+
+        # Transform location and scale parameters to be in the same units as the
+        # original forecast.
+        self.transform_anomalies_to_original_units(
+            truth_mean=truth_mean,
+            truth_sd=truth_sd,
+            location_parameter=location_parameter,
+            scale_parameter=scale_parameter,
+            forecast=forecast,
+            input_forecast_type=input_forecast_type,
+        )
+
+        # Generate output in desired format from distribution.
+        self.distribution = {
+            "name": get_attribute_from_coefficients(emos_coefficients, "distribution"),
+            "location": location_parameter,
+            "scale": scale_parameter,
+            "shape": get_attribute_from_coefficients(
+                emos_coefficients, "shape_parameters", optional=True
+            ),
+        }
+
+        template = prob_template if prob_template else forecast
+        result = generate_forecast_from_distribution(
+            self.distribution, template, self.percentiles, randomise, random_seed
+        )
+
+        if input_forecast_type != "probabilities" and not prob_template:
+            # Enforce that the result is within sensible bounds.
+            bounds_pairing = get_bounds_of_distribution(
+                bounds_pairing_key=result.name(), desired_units=result.units
+            )
+            result.data = clip(
+                result.data, a_min=bounds_pairing[0], a_max=bounds_pairing[1]
+            )
+
+        # Enforce correct dtype.
+        result.data = result.data.astype(dtype=float32)
+
+        return result
diff --git a/improver/calibration/utilities.py b/improver/calibration/utilities.py
index 68eec73f6d..9c788b4155 100644
--- a/improver/calibration/utilities.py
+++ b/improver/calibration/utilities.py
@@ -8,10 +8,13 @@
 
 """
 
+import warnings
+from pathlib import Path
 from typing import List, Set, Tuple, Union
 
 import iris
 import numpy as np
+import pandas as pd
 from iris.coords import DimCoord
 from iris.cube import Cube, CubeList
 from numpy import ndarray
@@ -472,3 +475,183 @@ def check_data_sufficiency(
                 f"historic forecasts and truth pairs: {proportion_of_nans}."
             )
             raise ValueError(msg)
+
+
+def prepare_cube_no_calibration(
+    forecast: Cube,
+    emos_coefficients: CubeList,
+    ignore_ecc_bounds_exceedance: bool = False,
+    validity_times: List[str] = None,
+    percentiles: List[float] = None,
+    prob_template: Cube = None,
+) -> Cube | None:
+    """
+    Function to add appropriate metadata to cubes that cannot be calibrated. If the
+    forecast can be calibrated then nothing is returned.
+
+    Args:
+        forecast (iris.cube.Cube):
+            The forecast to be calibrated. The input format could be either
+            realizations, probabilities or percentiles.
+        emos_coefficients (iris.cube.CubeList):
+            The EMOS coefficients to be applied to the forecast.
+        ignore_ecc_bounds_exceedance (bool):
+            If True, where the percentiles exceed the ECC bounds range,
+            raises a warning rather than an exception. This occurs when the
+            current forecast is in the form of probabilities and is
+            converted to percentiles, as part of converting the input
+            probabilities into realizations.
+        validity_times (List[str]):
+            Times at which the forecast must be valid. This must be provided
+            as a four digit string (HHMM) where the first two digits represent the hour
+            and the last two digits represent the minutes e.g. 0300 or 0315. If the
+            forecast provided is at a different validity time then no coefficients
+            will be applied.
+        percentiles (List[float]):
+            The set of percentiles used to create the calibrated forecast.
+        prob_template (iris.cube.Cube):
+            Optionally, a cube containing a probability forecast that will be
+            used as a template when generating probability output when the input
+            format of the forecast cube is not probabilities i.e. realizations
+            or percentiles. If no coefficients are provided and a probability
+            template is provided, the probability template forecast will be
+            returned as the uncalibrated probability forecast.
+
+    Returns:
+        The prepared forecast cube or None.
+    """
+    from improver.calibration import add_warning_comment, validity_time_check
+    from improver.ensemble_copula_coupling.ensemble_copula_coupling import (
+        ResamplePercentiles,
+    )
+
+    if validity_times is not None and not validity_time_check(forecast, validity_times):
+        if percentiles:
+            # Ensure that a consistent set of percentiles are returned,
+            # regardless of whether SAMOS is successfully applied.
+            percentiles = [np.float32(p) for p in percentiles]
+            forecast = ResamplePercentiles(
+                ecc_bounds_warning=ignore_ecc_bounds_exceedance
+            )(forecast, percentiles=percentiles)
+        elif prob_template:
+            forecast = prob_template
+        forecast = add_warning_comment(forecast)
+        return forecast
+
+    if emos_coefficients is None:
+        if prob_template:
+            msg = (
+                "There are no coefficients provided for calibration. As a "
+                "probability template has been provided with the aim of "
+                "creating a calibrated probability forecast, the probability "
+                "template will be returned as the uncalibrated probability "
+                "forecast."
+            )
+            warnings.warn(msg)
+            prob_template = add_warning_comment(prob_template)
+            return prob_template
+
+        if percentiles:
+            # Ensure that a consistent set of percentiles are returned,
+            # regardless of whether SAMOS is successfully applied.
+            percentiles = [np.float32(p) for p in percentiles]
+            forecast = ResamplePercentiles(
+                ecc_bounds_warning=ignore_ecc_bounds_exceedance
+            )(forecast, percentiles=percentiles)
+
+        msg = (
+            "There are no coefficients provided for calibration. The "
+            "uncalibrated forecast will be returned."
+        )
+        warnings.warn(msg)
+
+        forecast = add_warning_comment(forecast)
+        return forecast
+
+
+def convert_parquet_to_cube(
+    forecast: Path,
+    truth: Path,
+    forecast_period: int,
+    cycletime: str,
+    training_length: int,
+    diagnostic: str,
+    percentiles: List[float],
+    experiment: str,
+) -> iris.cube.CubeList:
+    """Function to convert a parquet file containing forecast and truth data
+    into a CubeList for use in calibration.
+
+    Args:
+        forecast (pathlib.Path):
+            The path to a Parquet file containing the historical forecasts
+            to be used for calibration. The expected columns within the
+            Parquet file are: forecast, blend_time, forecast_period,
+            forecast_reference_time, time, wmo_id, percentile, diagnostic,
+            latitude, longitude, period, height, cf_name, units.
+        truth (pathlib.Path):
+            The path to a Parquet file containing the truths to be used
+            for calibration. The expected columns within the
+            Parquet file are: ob_value, time, wmo_id, diagnostic, latitude,
+            longitude and altitude.
+        forecast_period (int):
+            Forecast period to be calibrated in seconds.
+        cycletime (str):
+            Cycletime of a format similar to 20170109T0000Z.
+        training_length (int):
+            Number of days within the training period.
+        diagnostic (str):
+            The name of the diagnostic to be calibrated within the forecast
+            and truth tables. This name is used to filter the Parquet file
+            when reading from disk.
+        percentiles (List[float]):
+            The set of percentiles to be used for estimating coefficients.
+            These should be a set of equally spaced quantiles.
+        experiment (str):
+            A value within the experiment column to select from the forecast
+            table.
+
+    Returns:
+        A CubeList containing the forecast and truth cubes, with the
+        forecast cube containing the percentiles as an auxiliary coordinate.
+    """
+    from improver.calibration.dataframe_utilities import (
+        forecast_and_truth_dataframes_to_cubes,
+    )
+
+    # Load forecasts from parquet file filtering by diagnostic and blend_time.
+    forecast_period_td = pd.Timedelta(int(forecast_period), unit="seconds")
+
+    cycletimes = pd.date_range(
+        end=pd.Timestamp(cycletime)
+        - pd.Timedelta(1, unit="days")
+        - forecast_period_td.floor("D"),
+        periods=int(training_length),
+        freq="D",
+    )
+    filters = [[("diagnostic", "==", diagnostic), ("blend_time", "in", cycletimes)]]
+    forecast_df = pd.read_parquet(forecast, filters=filters)
+
+    # Load truths from parquet file filtering by diagnostic.
+    filters = [[("diagnostic", "==", diagnostic)]]
+    truth_df = pd.read_parquet(truth, filters=filters)
+    if truth_df.empty:
+        msg = (
+            f"The requested filepath {truth} does not contain the "
+            f"requested contents: {filters}"
+        )
+        raise IOError(msg)
+
+    forecast_cube, truth_cube = forecast_and_truth_dataframes_to_cubes(
+        forecast_df,
+        truth_df,
+        cycletime,
+        forecast_period,
+        training_length,
+        percentiles=percentiles,
+        experiment=experiment,
+    )
+    if not forecast_cube or not truth_cube:
+        return [None, None]
+    else:
+        return CubeList([forecast_cube, truth_cube])
diff --git a/improver/cli/apply_emos_coefficients.py b/improver/cli/apply_emos_coefficients.py
index 848ab3edf8..425ac08087 100644
--- a/improver/cli/apply_emos_coefficients.py
+++ b/improver/cli/apply_emos_coefficients.py
@@ -107,66 +107,27 @@ def process(
             The calibrated forecast cube.
 
     """
-    import warnings
-
-    import numpy as np
 
     from improver.calibration import (
-        add_warning_comment,
         split_forecasts_and_coeffs,
-        validity_time_check,
     )
     from improver.calibration.emos_calibration import ApplyEMOS
-    from improver.ensemble_copula_coupling.ensemble_copula_coupling import (
-        ResamplePercentiles,
-    )
+    from improver.calibration.utilities import prepare_cube_no_calibration
 
     (forecast, coefficients, additional_predictors, land_sea_mask, prob_template) = (
         split_forecasts_and_coeffs(cubes, land_sea_mask_name)
     )
 
-    if validity_times is not None and not validity_time_check(forecast, validity_times):
-        if percentiles:
-            # Ensure that a consistent set of percentiles are returned,
-            # regardless of whether EMOS is successfully applied.
-            percentiles = [np.float32(p) for p in percentiles]
-            forecast = ResamplePercentiles(
-                ecc_bounds_warning=ignore_ecc_bounds_exceedance
-            )(forecast, percentiles=percentiles)
-        elif prob_template:
-            forecast = prob_template
-        forecast = add_warning_comment(forecast)
-        return forecast
-
-    if coefficients is None:
-        if prob_template:
-            msg = (
-                "There are no coefficients provided for calibration. As a "
-                "probability template has been provided with the aim of "
-                "creating a calibrated probability forecast, the probability "
-                "template will be returned as the uncalibrated probability "
-                "forecast."
-            )
-            warnings.warn(msg)
-            prob_template = add_warning_comment(prob_template)
-            return prob_template
-
-        if percentiles:
-            # Ensure that a consistent set of percentiles are returned,
-            # regardless of whether EMOS is successfully applied.
-            percentiles = [np.float32(p) for p in percentiles]
-            forecast = ResamplePercentiles(
-                ecc_bounds_warning=ignore_ecc_bounds_exceedance
-            )(forecast, percentiles=percentiles)
-
-        msg = (
-            "There are no coefficients provided for calibration. The "
-            "uncalibrated forecast will be returned."
-        )
-        warnings.warn(msg)
-
-        forecast = add_warning_comment(forecast)
-        return forecast
+    uncalibrated_forecast = prepare_cube_no_calibration(
+        forecast,
+        coefficients,
+        ignore_ecc_bounds_exceedance=ignore_ecc_bounds_exceedance,
+        validity_times=validity_times,
+        percentiles=percentiles,
+        prob_template=prob_template,
+    )
+    if uncalibrated_forecast is not None:
+        return uncalibrated_forecast
 
     calibration_plugin = ApplyEMOS(percentiles=percentiles)
     result = calibration_plugin(
diff --git a/improver/cli/apply_quantile_regression_random_forest.py b/improver/cli/apply_quantile_regression_random_forest.py
index 765406d6ca..c870302082 100644
--- a/improver/cli/apply_quantile_regression_random_forest.py
+++ b/improver/cli/apply_quantile_regression_random_forest.py
@@ -60,12 +60,12 @@ def process(
         iris.cube.Cube:
             The calibrated forecast cube.
     """
-    from improver.calibration import split_pickle_parquet_and_netcdf
+    from improver.calibration import split_netcdf_parquet_pickle
     from improver.calibration.load_and_apply_quantile_regression_random_forest import (
         PrepareAndApplyQRF,
     )
 
-    cubes, _, qrf_descriptors = split_pickle_parquet_and_netcdf(file_paths)
+    cubes, _, qrf_descriptors = split_netcdf_parquet_pickle(file_paths)
 
     result = PrepareAndApplyQRF(
         feature_config=feature_config,
diff --git a/improver/cli/apply_samos_coefficients.py b/improver/cli/apply_samos_coefficients.py
new file mode 100644
index 0000000000..5011f19030
--- /dev/null
+++ b/improver/cli/apply_samos_coefficients.py
@@ -0,0 +1,184 @@
+#!/usr/bin/env python
+# (C) Crown Copyright, Met Office. All rights reserved.
+#
+# This file is part of 'IMPROVER' and is released under the BSD 3-Clause license.
+# See LICENSE in the root of the repository for full licensing details.
+"""Script to apply Standardised Anomaly Model Output Statistics (SAMOS) calibration."""
+
+from improver import cli
+
+
+@cli.clizefy
+@cli.with_output
+def process(
+    *file_paths: cli.inputpath,
+    gam_features: cli.comma_separated_list,
+    validity_times: cli.comma_separated_list = None,
+    realizations_count: int = None,
+    randomise=False,
+    random_seed: int = None,
+    ignore_ecc_bounds_exceedance=False,
+    tolerate_time_mismatch=False,
+    predictor="mean",
+    percentiles: cli.comma_separated_list = None,
+    unique_site_id_key: str = None,
+):
+    """Apply coefficients for Standardized Anomaly Model Output Statistics (SAMOS).
+
+    The forecast is converted to anomaly data, the forecast mean and standard deviation
+    are predicted from the provided GAM models. The anomaly data is calibrated using the
+    EMOS plugin and the provided forecast coefficients. The calibrated forecast is then
+    regenerated from the distributional information and the data is written to a cube.
+    If no coefficients are provided the input forecast is returned unchanged.
+
+    Args:
+        file_paths (cli.inputpath):
+            A list of input paths containing:
+            - Path to a pickle file containing the GAMs to be used. This pickle
+            file contains two lists, each containing two fitted GAMs. The first list
+            contains GAMS for predicting each of the climatological mean and standard
+            deviation of the historical forecasts. The second list contains GAMS for
+            predicting each of the climatological mean and standard deviation of the
+            truths.
+            - Path to a NetCDF file containing the forecast to be calibrated. The
+            input forecast could be given as realizations, probabilities or
+            percentiles.
+            - Path to a NetCDF file containing a cube list that includes the
+            coefficients to be used for calibration or None. If none then the input,
+            or probability template if provided, is returned unchanged.
+            - Optionally, paths to additional NetCDF files that will be provided to
+            the emos plugin representing static additional predictors. These static
+            additional predictors are expected not to have a time coordinate. These
+            will be identified by their omission from the gam_features list.
+            - Optionally paths to additional NetCDF files that contain additional
+            features (static predictors) that will be provided to the GAM to help
+            calculate the climatological statistics. The name of the cubes should
+            match one of the names in the gam_features list.
+            - Optionally, path to a NetCDF file containing the land-sea mask. This
+            is used to ensure that only land points are calibrated. If no land-sea
+            mask is provided, all points will be calibrated.
+            - Optionally, path to a NetCDF file containing a probability forecast
+            that will be used as a template when generating probability output when
+            the input format of the forecast cube is not probabilities i.e.
+            realizations or percentiles. If no coefficients are provided and a
+            probability template is provided, the probability template forecast will
+            be returned as the uncalibrated probability forecast.
+
+        gam_features (list of str):
+            A list of the names of the cubes that will be used as additional
+            features in the GAM. Additionally, the name of any coordinates
+            that are to be used as features in the GAM.
+        validity_times (List[str]):
+            Times at which the forecast must be valid. This must be provided
+            as a four digit string (HHMM) where the first two digits represent the hour
+            and the last two digits represent the minutes e.g. 0300 or 0315. If the
+            forecast provided is at a different validity time then no coefficients
+            will be applied.
+        realizations_count (int):
+            Option to specify the number of ensemble realizations that will be
+            created from probabilities or percentiles when applying the SAMOS
+            coefficients.
+        randomise (bool):
+            Option to reorder the post-processed forecasts randomly. If not
+            set, the ordering of the raw ensemble is used. This option is
+            only valid when the input format is realizations.
+        random_seed (int):
+            Option to specify a value for the random seed for testing
+            purposes, otherwise the default random seen behaviour is utilised.
+            The random seed is used in the generation of the random numbers
+            used for either the randomise option to order the input
+            percentiles randomly, rather than use the ordering from the raw
+            ensemble, or for splitting tied values within the raw ensemble,
+            so that the values from the input percentiles can be ordered to
+            match the raw ensemble.
+        ignore_ecc_bounds_exceedance (bool):
+            If True, where the percentiles exceed the ECC bounds range,
+            raises a warning rather than an exception. This occurs when the
+            current forecasts is in the form of probabilities and is
+            converted to percentiles, as part of converting the input
+            probabilities into realizations.
+        tolerate_time_mismatch (bool):
+            If True, tolerate a mismatch in validity time and forecast period
+            for coefficients vs forecasts. Use with caution!
+        predictor (str):
+            String to specify the form of the predictor used to calculate
+            the location parameter when estimating the EMOS coefficients.
+            Currently the ensemble mean ("mean") and the ensemble
+            realizations ("realizations") are supported as the predictors.
+        percentiles (List[float]):
+            The set of percentiles used to create the calibrated forecast.
+        unique_site_id_key (str):
+            If working with spot data and available, the name of the coordinate
+            in the input cubes that contains unique site IDs, e.g. "wmo_id" if
+            all sites have a valid wmo_id.
+
+    Returns:
+        iris.cube.Cube:
+            The calibrated forecast cube.
+    """
+    import scipy.sparse
+
+    from improver.calibration import (
+        split_cubes_for_samos,
+        split_netcdf_parquet_pickle,
+    )
+    from improver.calibration.samos_calibration import ApplySAMOS
+    from improver.calibration.utilities import prepare_cube_no_calibration
+
+    # monkey-patch to 'tweak' scipy to prevent errors occurring.
+    def to_array(self):
+        return self.toarray()
+
+    scipy.sparse.spmatrix.A = property(to_array)
+
+    # Split the input paths into cubes and pickles
+    cubes, _, gams = split_netcdf_parquet_pickle(file_paths)
+
+    # Split the cubes into forecast cubes, along with any additional fields
+    # provided for the GAMs and EMOS, and the coefficients to be used for calibration
+    (
+        forecast,
+        _,
+        gam_additional_fields,
+        emos_coefficients,
+        emos_additional_fields,
+        prob_template,
+    ) = split_cubes_for_samos(
+        cubes=cubes,
+        gam_features=gam_features,
+        truth_attribute=None,
+        expect_emos_coeffs=True,
+        expect_emos_fields=True,
+    )
+
+    uncalibrated_forecast = prepare_cube_no_calibration(
+        forecast,
+        emos_coefficients,
+        ignore_ecc_bounds_exceedance=ignore_ecc_bounds_exceedance,
+        validity_times=validity_times,
+        percentiles=percentiles,
+        prob_template=prob_template,
+    )
+
+    if uncalibrated_forecast is not None:
+        return uncalibrated_forecast
+
+    plugin = ApplySAMOS(percentiles=percentiles, unique_site_id_key=unique_site_id_key)
+    result = plugin.process(
+        forecast=forecast,
+        forecast_gams=gams[0],
+        truth_gams=gams[1],
+        gam_features=gam_features,
+        emos_coefficients=emos_coefficients,
+        gam_additional_fields=gam_additional_fields,
+        emos_additional_fields=emos_additional_fields,
+        prob_template=prob_template,
+        realizations_count=realizations_count,
+        ignore_ecc_bounds=ignore_ecc_bounds_exceedance,
+        tolerate_time_mismatch=tolerate_time_mismatch,
+        predictor=predictor,
+        randomise=randomise,
+        random_seed=random_seed,
+    )
+
+    return result
diff --git a/improver/cli/estimate_emos_coefficients_from_table.py b/improver/cli/estimate_emos_coefficients_from_table.py
index 817a87803f..1c5e2873da 100755
--- a/improver/cli/estimate_emos_coefficients_from_table.py
+++ b/improver/cli/estimate_emos_coefficients_from_table.py
@@ -42,7 +42,7 @@ def process(
     Args:
         forecast (pathlib.Path):
             The path to a Parquet file containing the historical forecasts
-            to be used for calibration.The expected columns within the
+            to be used for calibration. The expected columns within the
             Parquet file are: forecast, blend_time, forecast_period,
             forecast_reference_time, time, wmo_id, percentile, diagnostic,
             latitude, longitude, period, height, cf_name, units.
@@ -88,7 +88,7 @@ def process(
         predictor (str):
             String to specify the form of the predictor used to calculate the
             location parameter when estimating the EMOS coefficients.
-            Currently the ensemble mean ("mean") and the ensemble realizations
+            Currently, the ensemble mean ("mean") and the ensemble realizations
             ("realizations") are supported as options.
         tolerance (float):
             The tolerance for the Continuous Ranked Probability Score (CRPS)
@@ -115,40 +115,21 @@ def process(
             CubeList containing the coefficients estimated using EMOS. Each
             coefficient is stored in a separate cube.
     """
-
     import iris
-    import pandas as pd
     from iris.cube import CubeList
 
-    from improver.calibration import get_training_period_cycles
-    from improver.calibration.dataframe_utilities import (
-        forecast_and_truth_dataframes_to_cubes,
-    )
     from improver.calibration.emos_calibration import (
         EstimateCoefficientsForEnsembleCalibration,
     )
+    from improver.calibration.utilities import convert_parquet_to_cube
 
-    # Load forecasts from parquet file filtering by diagnostic and blend_time.
-    cycletimes = get_training_period_cycles(cycletime, forecast_period, training_length)
-    filters = [[("diagnostic", "==", diagnostic), ("blend_time", "in", cycletimes)]]
-    forecast_df = pd.read_parquet(forecast, filters=filters)
-
-    # Load truths from parquet file filtering by diagnostic.
-    filters = [[("diagnostic", "==", diagnostic)]]
-    truth_df = pd.read_parquet(truth, filters=filters)
-    if truth_df.empty:
-        msg = (
-            f"The requested filepath {truth} does not contain the "
-            f"requested contents: {filters}"
-        )
-        raise IOError(msg)
-
-    forecast_cube, truth_cube = forecast_and_truth_dataframes_to_cubes(
-        forecast_df,
-        truth_df,
-        cycletime,
-        forecast_period,
-        training_length,
+    forecast_cube, truth_cube = convert_parquet_to_cube(
+        forecast,
+        truth,
+        forecast_period=forecast_period,
+        cycletime=cycletime,
+        training_length=training_length,
+        diagnostic=diagnostic,
         percentiles=percentiles,
         experiment=experiment,
     )
diff --git a/improver/cli/estimate_samos_coefficients.py b/improver/cli/estimate_samos_coefficients.py
new file mode 100755
index 0000000000..c2fc07f6c2
--- /dev/null
+++ b/improver/cli/estimate_samos_coefficients.py
@@ -0,0 +1,158 @@
+#!/usr/bin/env python
+# (C) Crown Copyright, Met Office. All rights reserved.
+#
+# This file is part of 'IMPROVER' and is released under the BSD 3-Clause license.
+# See LICENSE in the root of the repository for full licensing details.
+"""CLI to estimate the Ensemble Model Output Statistics (EMOS) coefficients for
+Standardized Anomaly Model Output Statistics (SAMOS)."""
+
+from improver import cli
+
+
+@cli.clizefy
+@cli.with_output
+def process(
+    *file_paths: cli.inputpath,
+    truth_attribute: str,
+    gam_features: cli.comma_separated_list,
+    use_default_initial_guess=False,
+    units=None,
+    predictor="mean",
+    tolerance: float = 0.02,
+    max_iterations: int = 1000,
+    unique_site_id_key: str = "wmo_id",
+):
+    """Estimate EMOS coefficients for use with SAMOS.
+
+    Loads in arguments for estimating coefficients for Ensemble Model
+    Output Statistics (EMOS), otherwise known as Non-homogeneous Gaussian
+    Regression (NGR). Two sources of input data must be provided: historical
+    forecasts and historical truth data (to use in calibration).
+    The estimated coefficients are output as a cube.
+
+    Args:
+        file_paths (cli.inputpath):
+            A list of input paths containing:
+            - Path to a pickle file containing the GAMs to be used. This pickle
+            file contains two lists, each containing two fitted GAMs. The first list
+            contains GAMS for predicting each of the climatological mean and
+            standard deviation of the historical forecasts. The second list contains
+            GAMS for predicting each of the climatological mean and standard
+            deviation of the truths.
+            - Paths to NetCDF files containing the historical forecasts and
+            corresponding truths used for calibration. They must have the same
+            diagnostic name and will be separated based on the provided truth
+            attribute.
+            - Optionally, paths to additional NetCDF files that will be provided to
+            the emos plugin representing static additional predictors. These static
+            additional predictors are expected not to have a time coordinate. These
+            will be identified by their omission from the gam_features list.
+            - Optionally paths to additional NetCDF files that contain additional
+            features (static predictors) that will be provided to the GAM to help
+            calculate the climatological statistics. The name of the cubes should
+            match one of the names in the gam_features list.
+        truth_attribute (str):
+            An attribute and its value in the format of "attribute=value",
+            which must be present on historical truth cubes.
+        gam_features (list of str):
+            A list of the names of the cubes that will be used as additional
+            features in the GAM. Additionally, the name of any coordinates
+            that are to be used as features in the GAM.
+        use_default_initial_guess (bool):
+            If True, use the default initial guess. The default initial guess
+            assumes no adjustments are required to the initial choice of
+            predictor to generate the calibrated distribution. This means
+            coefficients of 1 for the multiplicative coefficients and 0 for
+            the additive coefficients. If False, the initial guess is computed.
+        units (str):
+            The units that calibration should be undertaken in. The historical
+            forecast and truth will be converted as required.
+        predictor (str):
+            String to specify the form of the predictor used to calculate the
+            location parameter when estimating the EMOS coefficients.
+            Currently the ensemble mean ("mean") and the ensemble realizations
+            ("realizations") are supported as options.
+        tolerance (float):
+            The tolerance for the Continuous Ranked Probability Score (CRPS)
+            calculated by the minimisation. Once multiple iterations result in
+            a CRPS equal to the same value within the specified tolerance, the
+            minimisation will terminate.
+        max_iterations (int):
+            The maximum number of iterations allowed until the minimisation has
+            converged to a stable solution. If the maximum number of iterations
+            is reached but the minimisation has not yet converged to a stable
+            solution, then the available solution is used anyway, and a warning
+            is raised. If the predictor is "realizations", then the number of
+            iterations may require increasing, as there will be more
+            coefficients to solve.
+        unique_site_id_key (str):
+            If working with spot data and available, the name of the coordinate
+            in the input cubes that contains unique site IDs, e.g. "wmo_id" if
+            all sites have a valid wmo_id. For estimation the default is "wmo_id"
+            as we expect to be including observation data.
+
+    Returns:
+        iris.cube.CubeList:
+            CubeList containing the coefficients estimated using EMOS. Each
+            coefficient is stored in a separate cube.
+    """
+    import scipy.sparse
+
+    from improver.calibration import (
+        split_cubes_for_samos,
+        split_netcdf_parquet_pickle,
+    )
+    from improver.calibration.samos_calibration import TrainEMOSForSAMOS
+
+    # monkey-patch to 'tweak' scipy to prevent errors occurring
+    def to_array(self):
+        return self.toarray()
+
+    scipy.sparse.spmatrix.A = property(to_array)
+
+    # Split the input paths into cubes and pickles
+    cubes, _, gams = split_netcdf_parquet_pickle(file_paths)
+
+    # Split the cubes into forecast and truth cubes, along with any additional fields
+    # provided for the GAMs and EMOS.
+    (
+        forecast,
+        truth,
+        gam_additional_fields,
+        _,
+        emos_additional_fields,
+        _,
+    ) = split_cubes_for_samos(
+        cubes=cubes,
+        gam_features=gam_features,
+        truth_attribute=truth_attribute,
+        expect_emos_coeffs=False,
+        expect_emos_fields=True,
+    )
+
+    if forecast is None or truth is None:
+        return
+
+    # Train emos coefficients for the SAMOS model.
+    emos_kwargs = {
+        "use_default_initial_guess": use_default_initial_guess,
+        "desired_units": units,
+        "predictor": predictor,
+        "tolerance": tolerance,
+        "max_iterations": max_iterations,
+    }
+
+    plugin = TrainEMOSForSAMOS(
+        distribution="norm",
+        emos_kwargs=emos_kwargs,
+        unique_site_id_key=unique_site_id_key,
+    )
+    return plugin(
+        historic_forecasts=forecast,
+        truths=truth,
+        forecast_gams=gams[0],
+        truth_gams=gams[1],
+        gam_features=gam_features,
+        gam_additional_fields=gam_additional_fields,
+        emos_additional_fields=emos_additional_fields,
+    )
diff --git a/improver/cli/estimate_samos_coefficients_from_table.py b/improver/cli/estimate_samos_coefficients_from_table.py
new file mode 100755
index 0000000000..60a607ffe8
--- /dev/null
+++ b/improver/cli/estimate_samos_coefficients_from_table.py
@@ -0,0 +1,177 @@
+#!/usr/bin/env python
+# (C) Crown Copyright, Met Office. All rights reserved.
+#
+# This file is part of 'IMPROVER' and is released under the BSD 3-Clause license.
+# See LICENSE in the root of the repository for full licensing details.
+"""CLI to estimate the Ensemble Model Output Statistics (EMOS) coefficients for
+Standardized Anomaly Model Output Statistics (SAMOS)."""
+
+from improver import cli
+
+
+@cli.clizefy
+@cli.with_output
+def process(
+    *file_paths: cli.inputpath,
+    gam_features: cli.comma_separated_list,
+    use_default_initial_guess=False,
+    units=None,
+    predictor="mean",
+    tolerance: float = 0.02,
+    max_iterations: int = 1000,
+    percentiles: cli.comma_separated_list = None,
+    experiment: str = None,
+    forecast_period: int,
+    training_length: int,
+    diagnostic: str,
+    cycletime: str,
+    unique_site_id_key: str = "wmo_id",
+):
+    """Estimate EMOS coefficients for use with SAMOS.
+
+    Loads in arguments for estimating coefficients for Ensemble Model
+    Output Statistics (EMOS), otherwise known as Non-homogeneous Gaussian
+    Regression (NGR). Two sources of input data must be provided: historical
+    forecasts and historical truth data (to use in calibration).
+    The estimated coefficients are output as a cube.
+
+    Args:
+        file_paths (cli.inputpath):
+            A list of input paths containing:
+            - Path to a pickle file containing the GAMs to be used. This pickle
+            file contains two lists, each containing two fitted GAMs. The first list
+            contains GAMS for predicting each of the climatological mean and
+            standard deviation of the historical forecasts. The second list contains
+            GAMS for predicting each of the climatological mean and standard
+            deviation of the truths.
+            - The path to a Parquet file containing the historical forecasts
+            to be used for calibration. The expected columns within the
+            Parquet file are: forecast, blend_time, forecast_period,
+            forecast_reference_time, time, wmo_id, percentile, diagnostic,
+            latitude, longitude, period, height, cf_name, units.
+            - The path to a Parquet file containing the truths to be used
+            for calibration. The expected columns within the
+            Parquet file are: ob_value, time, wmo_id, diagnostic, latitude,
+            longitude and altitude.
+            - Optionally paths to additional NetCDF files that contain additional
+            features (static predictors) that will be provided when estimating the
+            SAMOS coefficients. The name of all cubes in this list must be in the
+            gam_features list.
+
+        gam_features (list of str):
+            A list of the names of the cubes that will be used as additional
+            features in the GAM. Additionally, the name of any coordinates
+            that are to be used as features in the GAM.
+        use_default_initial_guess (bool):
+            If True, use the default initial guess. The default initial guess
+            assumes no adjustments are required to the initial choice of
+            predictor to generate the calibrated distribution. This means
+            coefficients of 1 for the multiplicative coefficients and 0 for
+            the additive coefficients. If False, the initial guess is computed.
+        units (str):
+            The units that calibration should be undertaken in. The historical
+            forecast and truth will be converted as required.
+        predictor (str):
+            String to specify the form of the predictor used to calculate the
+            location parameter when estimating the EMOS coefficients.
+            Currently the ensemble mean ("mean") and the ensemble realizations
+            ("realizations") are supported as options.
+        tolerance (float):
+            The tolerance for the Continuous Ranked Probability Score (CRPS)
+            calculated by the minimisation. Once multiple iterations result in
+            a CRPS equal to the same value within the specified tolerance, the
+            minimisation will terminate.
+        max_iterations (int):
+            The maximum number of iterations allowed until the minimisation has
+            converged to a stable solution. If the maximum number of iterations
+            is reached but the minimisation has not yet converged to a stable
+            solution, then the available solution is used anyway, and a warning
+            is raised. If the predictor is "realizations", then the number of
+            iterations may require increasing, as there will be more
+            coefficients to solve.
+        percentiles (List[float]):
+            The set of percentiles to be used for estimating EMOS coefficients.
+            These should be a set of equally spaced quantiles.
+        experiment (str):
+            A value within the experiment column to select from the forecast
+            table.
+        forecast_period (int):
+            Forecast period to be calibrated in seconds.
+        training_length (int):
+            Number of days within the training period.
+        diagnostic (str):
+            The name of the diagnostic to be calibrated within the forecast
+            and truth tables. This name is used to filter the Parquet file
+            when reading from disk.
+        cycletime (str):
+            Cycletime of a format similar to 20170109T0000Z.
+        unique_site_id_key (str):
+            If working with spot data and available, the name of the coordinate
+            in the input cubes that contains unique site IDs, e.g. "wmo_id" if
+            all sites have a valid wmo_id. For estimation the default is "wmo_id"
+            as we expect to be including observation data.
+
+    Returns:
+        iris.cube.CubeList or None:
+            CubeList containing the coefficients estimated using EMOS. Each
+            coefficient is stored in a separate cube. None if a forecast or
+            truth cube cannot be created from the parquet table.
+    """
+    import scipy.sparse
+
+    from improver.calibration import (
+        identify_parquet_type,
+        split_netcdf_parquet_pickle,
+    )
+    from improver.calibration.samos_calibration import TrainEMOSForSAMOS
+    from improver.calibration.utilities import convert_parquet_to_cube
+
+    # monkey-patch to 'tweak' scipy to prevent errors occurring.
+    def to_array(self):
+        return self.toarray()
+
+    scipy.sparse.spmatrix.A = property(to_array)
+
+    # Split the input paths into cubes and pickles.
+    samos_additional_predictors, parquets, gams = split_netcdf_parquet_pickle(
+        file_paths
+    )
+    # Determine which parquet path provides truths and which historic forecasts.
+    forecast, truth = identify_parquet_type(parquets)
+
+    forecast_cube, truth_cube = convert_parquet_to_cube(
+        forecast,
+        truth,
+        diagnostic=diagnostic,
+        cycletime=cycletime,
+        forecast_period=forecast_period,
+        training_length=training_length,
+        percentiles=percentiles,
+        experiment=experiment,
+    )
+
+    if not forecast_cube or not truth_cube:
+        return
+
+    # Train emos coefficients for the SAMOS model.
+    emos_kwargs = {
+        "use_default_initial_guess": use_default_initial_guess,
+        "desired_units": units,
+        "predictor": predictor,
+        "tolerance": tolerance,
+        "max_iterations": max_iterations,
+    }
+
+    plugin = TrainEMOSForSAMOS(
+        distribution="norm",
+        emos_kwargs=emos_kwargs,
+        unique_site_id_key=unique_site_id_key,
+    )
+    return plugin(
+        historic_forecasts=forecast_cube,
+        truths=truth_cube,
+        forecast_gams=gams[0],
+        truth_gams=gams[1],
+        gam_features=gam_features,
+        gam_additional_fields=samos_additional_predictors,
+    )
diff --git a/improver/cli/estimate_samos_gams.py b/improver/cli/estimate_samos_gams.py
new file mode 100755
index 0000000000..85e31cf353
--- /dev/null
+++ b/improver/cli/estimate_samos_gams.py
@@ -0,0 +1,118 @@
+#!/usr/bin/env python
+# (C) Crown Copyright, Met Office. All rights reserved.
+#
+# This file is part of 'IMPROVER' and is released under the BSD 3-Clause license.
+# See LICENSE in the root of the repository for full licensing details.
+"""CLI to estimate the Generalized Additive Model (GAM) for Standardized Anomaly Model
+Output Statistics (SAMOS)."""
+
+from improver import cli
+
+
+@cli.clizefy
+@cli.with_output
+def process(
+    *cubes: cli.inputcube,
+    truth_attribute: str,
+    gam_features: cli.comma_separated_list,
+    model_specification: cli.inputjson,
+    max_iterations: int = 100,
+    tolerance: float = 0.0001,
+    distribution: str = "normal",
+    link: str = "identity",
+    fit_intercept: bool = True,
+    unique_site_id_key: str = "wmo_id",
+):
+    """Estimate Generalized Additive Model (GAM) for SAMOS.
+
+    Args:
+        cubes (list of iris.cube.Cube):
+            A list of cubes containing the historical forecasts and
+            corresponding truth used for calibration. They must have the same
+            cube name and will be separated based on the truth attribute. The
+            list may also contain additional features (static predictors) that
+            will be provided when estimating the GAM.
+        truth_attribute (str):
+            An attribute and its value in the format of "attribute=value",
+            which must be present on historical truth cubes.
+        gam_features (list of str):
+            A list of the names of the cubes that will be used as additional
+            features in the GAM.
+        model_specification (dict):
+            A list containing three items (in order):
+                1. a string containing a single pyGAM term; one of 'l' (linear),
+                's' (spline), 'te' (tensor), or 'f' (factor)
+                2. a list of integers which correspond to the features to be
+                included in that term
+                3. a dictionary of kwargs to be included when defining the term
+        max_iterations (int):
+            The maximum number of iterations to use when estimating the GAM
+            coefficients.
+        tolerance (float):
+            The tolerance for the stopping criteria.
+        distribution (str):
+            The distribution to be used in the model. Valid options are normal, binomial,
+            poisson, gamma, inv-gauss.
+        link (str):
+            The link function to be used in the model. Valid options are identity, logit, inverse, log
+            or inverse-squared.
+        fit_intercept (bool):
+            Whether to include an intercept term in the model. Default is True.
+        unique_site_id_key (str):
+            If working with spot data and available, the name of the coordinate
+            in the input cubes that contains unique site IDs, e.g. "wmo_id" if
+            all sites have a valid wmo_id. For GAM estimation the default is
+            "wmo_id" as we expect to have a training data set comprising matched
+            obs and forecast sites.
+
+    Returns:
+        List:
+            A list containing the fitted GAMs for the forecast and truth cubes in that order.
+    """
+
+    from improver.calibration import split_cubes_for_samos
+    from improver.calibration.samos_calibration import TrainGAMsForSAMOS
+
+    # Split the cubes into forecast and truth cubes, along with any additional fields
+    # provided for the GAMs.
+    (
+        forecast,
+        truth,
+        gam_additional_fields,
+        _,
+        _,
+        _,
+    ) = split_cubes_for_samos(
+        cubes=cubes,
+        gam_features=gam_features,
+        truth_attribute=truth_attribute,
+        expect_emos_coeffs=False,
+        expect_emos_fields=False,
+    )
+
+    if forecast is None or truth is None:
+        return
+
+    plugin = TrainGAMsForSAMOS(
+        model_specification=model_specification,
+        max_iter=max_iterations,
+        tol=tolerance,
+        distribution=distribution,
+        link=link,
+        fit_intercept=fit_intercept,
+        unique_site_id_key=unique_site_id_key,
+    )
+
+    truth_gams = plugin.process(
+        input_cube=truth,
+        features=gam_features,
+        additional_fields=gam_additional_fields,
+    )
+
+    forecast_gams = plugin.process(
+        input_cube=forecast,
+        features=gam_features,
+        additional_fields=gam_additional_fields,
+    )
+
+    return [forecast_gams, truth_gams]
diff --git a/improver/cli/estimate_samos_gams_from_table.py b/improver/cli/estimate_samos_gams_from_table.py
new file mode 100755
index 0000000000..3219d35a88
--- /dev/null
+++ b/improver/cli/estimate_samos_gams_from_table.py
@@ -0,0 +1,161 @@
+#!/usr/bin/env python
+# (C) Crown Copyright, Met Office. All rights reserved.
+#
+# This file is part of 'IMPROVER' and is released under the BSD 3-Clause license.
+# See LICENSE in the root of the repository for full licensing details.
+"""CLI to estimate the Generalized Additive Model (GAM) for Standardized Anomaly Model
+Output Statistics (SAMOS)."""
+
+from improver import cli
+
+
+@cli.clizefy
+@cli.with_output
+def process(
+    *file_paths: cli.inputpath,
+    gam_features: cli.comma_separated_list,
+    model_specification: cli.inputjson,
+    tolerance: float = 0.02,
+    max_iterations: int = 1000,
+    percentiles: cli.comma_separated_list = None,
+    experiment: str = None,
+    forecast_period: int,
+    training_length: int,
+    diagnostic: str,
+    cycletime: str,
+    distribution: str = "normal",
+    link: str = "identity",
+    fit_intercept: bool = True,
+    unique_site_id_key: str = "wmo_id",
+):
+    """Estimate Generalized Additive Model (GAM) for SAMOS.
+
+    Args:
+        file_paths (cli.inputpath):
+            A list of input paths containing:
+            - The path to a Parquet file containing the historical forecasts
+            to be used for calibration.The expected columns within the
+            Parquet file are: forecast, blend_time, forecast_period,
+            forecast_reference_time, time, wmo_id, percentile, diagnostic,
+            latitude, longitude, period, height, cf_name, units.
+            - The path to a Parquet file containing the truths to be used
+            for calibration. The expected columns within the
+            Parquet file are: ob_value, time, wmo_id, diagnostic, latitude,
+            longitude and altitude.
+            - Optionally paths to additional NetCDF files that contain additional
+            features (static predictors) that will be provided when estimating the
+            GAM.
+        gam_features (list of str):
+            A list of the names of the cubes that will be used as additional
+            features in the GAM. Additionally, the name of any coordinates
+            that are to be used as features in the GAM.
+        model_specification (dict):
+            A list containing three items (in order):
+                1. a string containing a single pyGAM term; one of 'l' (linear),
+                's' (spline), 'te' (tensor), or 'f' (factor)
+                2. a list of integers which correspond to the features to be
+                included in that term
+                3. a dictionary of kwargs to be included when defining the term
+        tolerance (float):
+            The tolerance for the Continuous Ranked Probability Score (CRPS)
+            calculated by the minimisation. Once multiple iterations result in
+            a CRPS equal to the same value within the specified tolerance, the
+            minimisation will terminate.
+        max_iterations (int):
+            The maximum number of iterations allowed until the minimisation has
+            converged to a stable solution. If the maximum number of iterations
+            is reached but the minimisation has not yet converged to a stable
+            solution, then the available solution is used anyway, and a warning
+            is raised. If the predictor is "realizations", then the number of
+            iterations may require increasing, as there will be more
+            coefficients to solve.
+        percentiles (List[float]):
+            The set of percentiles to be used for estimating EMOS coefficients.
+            These should be a set of equally spaced quantiles.
+        experiment (str):
+            A value within the experiment column to select from the forecast
+            table.
+        forecast_period (int):
+            Forecast period to be calibrated in seconds.
+        training_length (int):
+            Number of days within the training period.
+        diagnostic (str):
+            The name of the diagnostic to be calibrated within the forecast
+            and truth tables. This name is used to filter the Parquet file
+            when reading from disk.
+        cycletime (str):
+            Cycletime of a format similar to 20170109T0000Z.
+        distribution (str):
+            The distribution to be used in the model. Valid options are normal, binomial,
+            poisson, gamma, inv-gauss.
+        link (str):
+            The link function to be used in the model. Valid options are identity, logit, inverse, log
+            or inverse-squared.
+        fit_intercept (bool):
+            Whether to include an intercept term in the model. Default is True.
+        unique_site_id_key (str):
+            If working with spot data and available, the name of the coordinate
+            in the input cubes that contains unique site IDs, e.g. "wmo_id" if
+            all sites have a valid wmo_id. For GAM estimation the default is
+            "wmo_id" as we expect to have a training data set comprising matched
+            obs and forecast sites.
+
+    Returns:
+        (list of GAM models):
+            A list containing two lists of two fitted GAMs. The first list
+            contains two fitted GAMs, one for predicting the climatological mean
+            of the historical forecasts and the second predicting the
+            climatological standard deviation. The second list contains two
+            fitted GAMs, one for predicting the climatological mean of the truths
+            and the second predicting the climatological standard deviation.
+    """
+
+    from improver.calibration import (
+        identify_parquet_type,
+        split_netcdf_parquet_pickle,
+    )
+    from improver.calibration.samos_calibration import TrainGAMsForSAMOS
+    from improver.calibration.utilities import convert_parquet_to_cube
+
+    # Split the input paths into cubes and pickles.
+    additional_predictors, parquets, _ = split_netcdf_parquet_pickle(file_paths)
+    # Determine which parquet path provides truths and which historic forecasts.
+    forecast, truth = identify_parquet_type(parquets)
+
+    forecast_cube, truth_cube = convert_parquet_to_cube(
+        forecast,
+        truth,
+        diagnostic=diagnostic,
+        cycletime=cycletime,
+        forecast_period=forecast_period,
+        training_length=training_length,
+        percentiles=percentiles,
+        experiment=experiment,
+    )
+
+    if not forecast_cube or not truth_cube:
+        return
+
+    plugin = TrainGAMsForSAMOS(
+        model_specification=model_specification,
+        max_iter=max_iterations,
+        tol=tolerance,
+        distribution=distribution,
+        link=link,
+        fit_intercept=fit_intercept,
+        unique_site_id_key=unique_site_id_key,
+    )
+
+    truth_gams = plugin.process(
+        input_cube=truth_cube,
+        features=gam_features,
+        additional_fields=additional_predictors,
+    )
+
+    forecast_gams = plugin.process(
+        input_cube=forecast_cube,
+        features=gam_features,
+        additional_fields=additional_predictors,
+    )
+
+    return [forecast_gams, truth_gams]
diff --git a/improver/ensemble_copula_coupling/ensemble_copula_coupling.py b/improver/ensemble_copula_coupling/ensemble_copula_coupling.py
index 20b989ceab..833b48d944 100644
--- a/improver/ensemble_copula_coupling/ensemble_copula_coupling.py
+++ b/improver/ensemble_copula_coupling/ensemble_copula_coupling.py
@@ -353,12 +353,13 @@ def _interpolate_percentiles(
             bounds_pairing = get_bounds_of_distribution(
                 forecast_at_percentiles.name(), cube_units
             )
-            (original_percentiles, forecast_at_reshaped_percentiles) = (
-                self._add_bounds_to_percentiles_and_forecast_at_percentiles(
-                    original_percentiles,
-                    forecast_at_reshaped_percentiles,
-                    bounds_pairing,
-                )
+            (
+                original_percentiles,
+                forecast_at_reshaped_percentiles,
+            ) = self._add_bounds_to_percentiles_and_forecast_at_percentiles(
+                original_percentiles,
+                forecast_at_reshaped_percentiles,
+                bounds_pairing,
             )
 
         forecast_at_interpolated_percentiles = interpolate_multiple_rows_same_x(
@@ -658,10 +659,11 @@ def _probabilities_to_percentiles(
             )
             cube_units = forecast_probabilities.coord(threshold_coord.name()).units
             bounds_pairing = get_bounds_of_distribution(phenom_name, cube_units)
-            (threshold_points, probabilities_for_cdf) = (
-                self._add_bounds_to_thresholds_and_probabilities(
-                    threshold_points, probabilities_for_cdf, bounds_pairing
-                )
+            (
+                threshold_points,
+                probabilities_for_cdf,
+            ) = self._add_bounds_to_thresholds_and_probabilities(
+                threshold_points, probabilities_for_cdf, bounds_pairing
             )
 
         if np.any(np.diff(probabilities_for_cdf) < 0):
@@ -1001,10 +1003,10 @@ def _location_and_scale_parameters_to_percentiles(
             result[index, :] = percentile_method.ppf(percentile_list)
             # If percent point function (PPF) returns NaNs, fill in
             # mean instead of NaN values. NaN will only be generated if the
-            # scale parameter (standard deviation) is zero. Therefore, if the
-            # scale parameter (standard deviation) is zero, the mean value is
+            # scale parameter (standard deviation) is zero or negative. Therefore, if the
+            # scale parameter (standard deviation) is zero or negative, the mean value is
             # used for all gridpoints with a NaN.
-            if np.any(scale_data == 0):
+            if np.any(scale_data <= 0):
                 nan_index = np.argwhere(np.isnan(result[index, :]))
                 result[index, nan_index] = location_data[nan_index]
             if np.any(np.isnan(result)):
@@ -1393,14 +1395,15 @@ def rank_ecc(
         Raises:
             ValueError: tie_break is not either 'random' or 'realization'
         """
+        if random_seed is not None:
+            random_seed = int(random_seed)
+        random_seed = np.random.RandomState(random_seed)
+
         results = iris.cube.CubeList([])
         for rawfc, calfc in zip(
             raw_forecast_realizations.slices_over("time"),
             post_processed_forecast_percentiles.slices_over("time"),
         ):
-            if random_seed is not None:
-                random_seed = int(random_seed)
-            random_seed = np.random.RandomState(random_seed)
             if random_ordering:
                 random_data = random_seed.rand(*rawfc.data.shape)
                 # Returns the indices that would sort the array.
diff --git a/improver/utilities/cli_utilities.py b/improver/utilities/cli_utilities.py
index cdc2937b6d..3abad560c9 100644
--- a/improver/utilities/cli_utilities.py
+++ b/improver/utilities/cli_utilities.py
@@ -7,6 +7,8 @@
 import json
 from typing import Dict, Optional
 
+import joblib
+
 
 def load_json_or_none(file_path: Optional[str]) -> Optional[Dict]:
     """If there is a path, runs json.load and returns it. Else returns None.
@@ -16,9 +18,7 @@ def load_json_or_none(file_path: Optional[str]) -> Optional[Dict]:
             File path to the json file to load.
 
     Returns:
-        A dictionary loaded from a json file.
-        or
-        None
+        A dictionary loaded from a json file, or None.
     """
     metadata_dict = None
     if file_path:
@@ -26,3 +26,19 @@ def load_json_or_none(file_path: Optional[str]) -> Optional[Dict]:
         with open(file_path, "r") as input_file:
             metadata_dict = json.load(input_file)
     return metadata_dict
+
+
+def load_pickle_or_none(file_path: Optional[str]) -> Optional[list]:
+    """If there is a path, load the pickled object and returns it. Else returns None.
+
+    Args:
+        file_path:
+            File path to the pickled object file to load.
+
+    Returns:
+        A list of contained pickled objects, or None.
+    """
+    if file_path:
+        with open(file_path, "rb") as input_file:
+            object = joblib.load(input_file)
+    return object if file_path else None
diff --git a/improver/utilities/compare.py b/improver/utilities/compare.py
index ce48145ad3..64da974fea 100644
--- a/improver/utilities/compare.py
+++ b/improver/utilities/compare.py
@@ -373,13 +373,43 @@ def compare_data(
         reporter(f"different data {name} - {numpy_err_message}")
 
 
+def compare_objects(
+    actual_var: PathLike, desired_var: PathLike, reporter: Callable[[str], None]
+) -> None:
+    """
+    Compare two pickled objects. This is not a complete comparison as two
+    objects may have the same string representation but be different
+    objects.
+
+    Args:
+        actual_var: Path to the pickled object produced by test run.
+        desired_var: Path to the pickled object considered good.
+        reporter: Callback function for reporting differences.
+    """
+    import joblib
+
+    try:
+        with open(actual_var, "rb") as f:
+            actual_data = joblib.load(f)
+    except OSError as exc:
+        reporter(str(exc))
+        return
+    try:
+        with open(desired_var, "rb") as f:
+            desired_data = joblib.load(f)
+    except OSError as exc:
+        reporter(str(exc))
+        return
+    if str(actual_data) != str(desired_data):
+        reporter(f"Different data found in {actual_data} and {desired_data}.")
+
+
 def compare_pickled_forest(
     output_path: PathLike,
     kgo_path: PathLike,
     reporter: Optional[Callable[[str], None]] = None,
 ):
     """Load a pickled forest (e.g. a Random Forest) and compare its contents.
-
     Args:
         output_path: data file produced by test run
         kgo_path: data file considered good e.g. KGO
diff --git a/improver/utilities/generalized_additive_models.py b/improver/utilities/generalized_additive_models.py
index db9932b84a..52fdeafcee 100644
--- a/improver/utilities/generalized_additive_models.py
+++ b/improver/utilities/generalized_additive_models.py
@@ -131,6 +131,10 @@ def to_array(self):
         # Import from pygam here to minimize dependencies
         from pygam import GAM
 
+        # Remove nans from arrays.
+        predictors = predictors[~np.isnan(targets)]
+        targets = targets[~np.isnan(targets)]
+
         eqn = self.create_pygam_model()
         gam = GAM(
             eqn,
diff --git a/improver/utilities/mathematical_operations.py b/improver/utilities/mathematical_operations.py
index b80561b59e..17ce5ad569 100644
--- a/improver/utilities/mathematical_operations.py
+++ b/improver/utilities/mathematical_operations.py
@@ -18,9 +18,7 @@
     create_new_diagnostic_cube,
     generate_mandatory_attributes,
 )
-from improver.utilities.cube_checker import (
-    assert_spatial_coords_match,
-)
+from improver.utilities.cube_checker import assert_spatial_coords_match
 from improver.utilities.cube_manipulation import (
     enforce_coordinate_ordering,
     get_dim_coord_names,
@@ -468,8 +466,10 @@ def calculate_anomalies(
     ) -> ndarray:
         """Calculate climate anomalies from the input cubes."""
         anomalies_data = diagnostic_cube.data - mean_cube.data
+
         if std_cube:
             anomalies_data = anomalies_data / std_cube.data
+
         return anomalies_data
 
     @staticmethod
@@ -556,6 +556,11 @@ def _create_output_cube(
         self._add_reference_epoch_metadata(output_cube, mean_cube)
 
         anomalies_data = self.calculate_anomalies(diagnostic_cube, mean_cube, std_cube)
+        # Ensure any mask from the input cube is copied over to the output cube.
+        try:
+            anomalies_data.mask = output_cube.data.mask
+        except AttributeError:
+            pass
 
         output_cube.data = anomalies_data
 
diff --git a/improver_tests/acceptance/SHA256SUMS b/improver_tests/acceptance/SHA256SUMS
index f59d51ecb7..0cd3cf06b9 100644
--- a/improver_tests/acceptance/SHA256SUMS
+++ b/improver_tests/acceptance/SHA256SUMS
@@ -132,6 +132,22 @@ a82a48e690514f82b77cf54d70df10166c56ce1ab977ee5217348ac74d3293c9  ./apply-reliab
 cf666c51a6355d050406e5c7d15daca1e13d415be05a0cc92429d458474546fc  ./apply-reliability-calibration/point_by_point/cubelist_table_point_by_point.nc
 06681263dcb0538d844205937637e186352edd2a362b5f4ea5336bc67852acc2  ./apply-reliability-calibration/point_by_point/forecast_point_by_point.nc
 30c0807e00af53babe747d04ec84ab5a71c977948c89aeb16d13b79740eb409b  ./apply-reliability-calibration/point_by_point/kgo_point_by_point.nc
+ff3a00a16fa94697d6e529a6f98384e4602f70a7d6ce26669c3947c8ac2e6f7e  ./apply-samos-coefficients/additional_features/landmask.nc
+8964d00ec69f384501269e0093bb7c4684d1fd7cf292a8286ee4ea68fe3b73e4  ./apply-samos-coefficients/additional_features/roughness_length.nc
+a809cb7eb51c5cc35403e0f9d357a6bdf11431bb05ac6ba9bc10d58abe636d41  ./apply-samos-coefficients/forecast.nc
+7c0268d6117de9002033a72a0388c963094c0f87b2448c2455802e41750960e0  ./apply-samos-coefficients/gam_configs/samos_gam.pkl
+a979ae8ee62d3e7867ac6ed31a423c03bd65fc52f818d7e24ad212f789352c61  ./apply-samos-coefficients/gam_configs/samos_gam_additional_features.pkl
+33ae30a74bc600b0f4ff10feff1ae3443323f1c17e57ec6c2f0aec4cb0e72ae5  ./apply-samos-coefficients/gam_configs/samos_gam_sites.pkl
+6802f0378f76fba1c8bf44f7e90d20052048de851873862a80f09883ad5aaaff  ./apply-samos-coefficients/kgo_coord.nc
+8b1f4642c692a0811dc4d6a6d95ed552a8024bee017b9cc3e2e870410d38ddd3  ./apply-samos-coefficients/kgo_cubes_additional_cubes.nc
+b0abca332d83f8df653711c6e431cd8f954dac987bb7d6acdabf3e05193b3f20  ./apply-samos-coefficients/kgo_cubes_emos_and_gam_features.nc
+b5cd51b07b1daa7338dee0ad0d3eff626df4c1d692fbb4c35c75dc14f6ab753e  ./apply-samos-coefficients/kgo_sites.nc
+a3cbc9e1124dae85f911ecb338408a992bb3af0e6459a912d2296f8fc1d219c1  ./apply-samos-coefficients/kgo_with_comment.nc
+1af4c8f931ec0b2fd53d9e7225abac7cec9039e651c3956c8289e1d5490d6645  ./apply-samos-coefficients/samos_coefficients/coefficients_coordinates.nc
+e8e9661759ebf7cf8e78d259a33a57ba8a5f055a013bda68daf2f265d28de367  ./apply-samos-coefficients/samos_coefficients/coefficients_emos_and_gam_features.nc
+17923bf4ed5251a4d4e600084365bbe75f2f0c91c4aadf1893b7392460161fee  ./apply-samos-coefficients/samos_coefficients/coefficients_extra_features.nc
+1a93dfcda78232e4302601c67b0e1c24c5f7e8fd0a7b55d8be6c52360ca7010d  ./apply-samos-coefficients/samos_coefficients/coefficients_sites.nc
+45aad45ea94eb663e9344c7fd93e08df3491418bf1d7f3f5be32ef5569bb81e6  ./apply-samos-coefficients/site_forecast.nc
 cf66fdbefec74d058e131a31dd1f51e4d10595265d21680272eb9590c3cc2fc2  ./between-thresholds/input.nc
 d40ce0bdf404f36afdeb7b23860da08cb93fa91766ba25b12c1b4ec99a41ca7d  ./between-thresholds/kgo.nc
 a938495a85d1b9488e57e54c26d9120eb6bca016d9cc353967cfa347812e4fdc  ./between-thresholds/threshold_ranges_km.json
@@ -420,6 +436,33 @@ cdcbaebb219f4be7775f26d7d6fd8384002d1e46320d7f8bc086ea39c7913652  ./estimate-emo
 397b0e3712041863f9d8ac45480b67162f6e740cf9b7d2e1521a6844074146e8  ./estimate-emos-coefficients/truncated_normal/truth/20170602T1500Z-PT0000H-horizontal_wind_speed_at_10m.nc
 1032b6e41987c08ad4caace25e182a02701177408461c7b573f3e8b304b31c16  ./estimate-emos-coefficients/truncated_normal/truth/20170603T1500Z-PT0000H-horizontal_wind_speed_at_10m.nc
 fe84db49d69ea62e2e82312c7b3d860c7329c627d79d76c2543d365f69d2e0d8  ./estimate-emos-coefficients/truncated_normal/truth/20170604T1500Z-PT0000H-horizontal_wind_speed_at_10m.nc
+cc3fb6e27313a487f17a69ca223523e86f3fa6c7c71f08574d2efcecab5bc4a2  ./estimate-samos-coefficients-from-table/distance_to_water.nc
+a67d9606590ecafd2099f5491e683ec722b574c9bc2a2d1105ecc2eba8ed7bc8  ./estimate-samos-coefficients-from-table/gam_coordinates.pkl
+8811643fb3b05232e8dfd2bac852f986c7b6f99989946a26f92630d5a588e834  ./estimate-samos-coefficients-from-table/gam_cubes.pkl
+d340bbb94fbaa1f975b88ef41e9d8e3dfda54d3935d4c84257cde7bb18d72856  ./estimate-samos-coefficients-from-table/kgo_coordinates.nc
+00ef721539ca3754638a032a42f6723e55981f6b4acdf3076ba0648551b6fecf  ./estimate-samos-coefficients-from-table/kgo_gam_and_emos_cube.nc
+ba78790fdabba784498c3c9b000f0f9c67664e7d7a3367780f67bce2f5858ea3  ./estimate-samos-coefficients-from-table/kgo_gam_cube.nc
+0938d182a496e8708301dd19bf715402aceaa0a2da32915c320b22ef30933d1f  ./estimate-samos-coefficients-from-table/land_fraction.nc
+ff3a00a16fa94697d6e529a6f98384e4602f70a7d6ce26669c3947c8ac2e6f7e  ./estimate-samos-coefficients/additional_features/landmask.nc
+8964d00ec69f384501269e0093bb7c4684d1fd7cf292a8286ee4ea68fe3b73e4  ./estimate-samos-coefficients/additional_features/roughness_length.nc
+7c0268d6117de9002033a72a0388c963094c0f87b2448c2455802e41750960e0  ./estimate-samos-coefficients/gam_configs/samos_gam.pkl
+a979ae8ee62d3e7867ac6ed31a423c03bd65fc52f818d7e24ad212f789352c61  ./estimate-samos-coefficients/gam_configs/samos_gam_additional_features.pkl
+33ae30a74bc600b0f4ff10feff1ae3443323f1c17e57ec6c2f0aec4cb0e72ae5  ./estimate-samos-coefficients/gam_configs/samos_gam_sites.pkl
+85f86c253b3d7047b3078761992690251629c549dd22b9fde87fbf604bd2afa6  ./estimate-samos-coefficients/kgo_coordinates.nc
+12d5d92e469d01943123f7c7b5afb944bf7613b65431c44cdaa22277f70d1a7b  ./estimate-samos-coefficients/kgo_extra_gam_feature.nc
+e8e9661759ebf7cf8e78d259a33a57ba8a5f055a013bda68daf2f265d28de367  ./estimate-samos-coefficients/kgo_gam_and_emos.nc
+7d89c1a0f1dd64e9c6c0c406f0e2a47a9282778cbe688b363170b84b4f3e87cd  ./estimate-samos-coefficients/kgo_sites.nc
+7c0268d6117de9002033a72a0388c963094c0f87b2448c2455802e41750960e0  ./estimate-samos-gam/kgo.pkl
+a979ae8ee62d3e7867ac6ed31a423c03bd65fc52f818d7e24ad212f789352c61  ./estimate-samos-gam/kgo_extra_cube.pkl
+33ae30a74bc600b0f4ff10feff1ae3443323f1c17e57ec6c2f0aec4cb0e72ae5  ./estimate-samos-gam/kgo_sites.pkl
+3bfb4f1b610528f34096745613fc62db3ca10b7f38c54b2495996a9b2cdfc695  ./estimate-samos-gam/output.pkl
+8964d00ec69f384501269e0093bb7c4684d1fd7cf292a8286ee4ea68fe3b73e4  ./estimate-samos-gam/roughness_length.nc
+34222261b96e60ffd281637d5876cf84664a59efc9977407cb3a7de30c99b0c9  ./estimate-samos-gam/samos_model_spec_simple.json
+cc3fb6e27313a487f17a69ca223523e86f3fa6c7c71f08574d2efcecab5bc4a2  ./estimate-samos-gams-from-table/distance_to_water.nc
+a67d9606590ecafd2099f5491e683ec722b574c9bc2a2d1105ecc2eba8ed7bc8  ./estimate-samos-gams-from-table/kgo_coords.pkl
+8811643fb3b05232e8dfd2bac852f986c7b6f99989946a26f92630d5a588e834  ./estimate-samos-gams-from-table/kgo_cubes.pkl
+3ad45b58562c2c654bc988e3eccd55c608a312b615acd1297d569dbef42cbfb9  ./estimate-samos-gams-from-table/roughness.nc
+34222261b96e60ffd281637d5876cf84664a59efc9977407cb3a7de30c99b0c9  ./estimate-samos-gams-from-table/samos_model_spec_simple.json
 386284bf4c5daa3567fb78ca00331494c99ac664902490188dfdf98c39a494ba  ./expected-value/deterministic.nc
 64bcba24bc314d1ffc6bb7adbae6b9e6d872cb75e8ef834463e4fb72e55ad16a  ./expected-value/kgo.nc
 f61aa0326219c7e3934823bc4d607c2b3ec022a7c1df15d3519aea91207effce  ./expected-value/percentile.nc
diff --git a/improver_tests/acceptance/acceptance.py b/improver_tests/acceptance/acceptance.py
index 4ae0efc049..35a39b28ff 100644
--- a/improver_tests/acceptance/acceptance.py
+++ b/improver_tests/acceptance/acceptance.py
@@ -15,7 +15,11 @@
 
 from improver import cli
 from improver.constants import DEFAULT_TOLERANCE
-from improver.utilities.compare import compare_netcdfs, compare_pickled_forest
+from improver.utilities.compare import (
+    compare_netcdfs,
+    compare_objects,
+    compare_pickled_forest,
+)
 
 RECREATE_DIR_ENVVAR = "RECREATE_KGO"
 ACC_TEST_DIR_ENVVAR = "IMPROVER_ACC_TEST_DIR"
@@ -320,8 +324,11 @@ def compare(
         rtol (float): Relative tolerance
         exclude_vars (Iterable[str]): Variables to exclude from comparison
         exclude_attributes (Iterable[str]): Attributes to exclude from comparison
-        file_type (str): Name of file type to compare, either "netCDF" or
-            "pickled_forest".
+        file_type (str): Name of file type to compare. One "netCDF", "pickled_forest",
+        or "generic_pickle".
+
+    Raises:
+        ValueError: if file_type is not recognised.
 
     Returns:
         None
@@ -371,6 +378,11 @@ def message_recorder(exception_message):
             kgo_path,
             reporter=message_recorder,
         )
+    elif file_type == "generic_pickle":
+        compare_objects(output_path, kgo_path, reporter=message_recorder)
+    else:
+        msg = f"There is no support for comparing the file_type provided: {file_type}"
+        raise ValueError(msg)
 
     if difference_found:
         if recreate:
diff --git a/improver_tests/acceptance/test_apply_samos_coefficients.py b/improver_tests/acceptance/test_apply_samos_coefficients.py
new file mode 100644
index 0000000000..af31b637bc
--- /dev/null
+++ b/improver_tests/acceptance/test_apply_samos_coefficients.py
@@ -0,0 +1,149 @@
+# (C) Crown Copyright, Met Office. All rights reserved.
+#
+# This file is part of 'IMPROVER' and is released under the BSD 3-Clause license.
+# See LICENSE in the root of the repository for full licensing details.
+"""Tests for the apply-samos-coefficients CLI"""
+
+import pytest
+
+from improver.constants import LOOSE_TOLERANCE
+
+from . import acceptance as acc
+
+pytestmark = [pytest.mark.acc, acc.skip_if_kgo_missing]
+CLI = acc.cli_name_with_dashes(__file__)
+run_cli = acc.run_cli(CLI)
+
+
+def test_normal_gam_coordinates(tmp_path):
+    """Test diagnostic with assumed normal distribution and GAM feature
+    provided as coordinates on the input cube."""
+    kgo_dir = acc.kgo_root() / "apply-samos-coefficients"
+    kgo_path = kgo_dir / "kgo_coord.nc"
+
+    input_path = kgo_dir / "forecast.nc"
+    samos_est_path = kgo_dir / "samos_coefficients/coefficients_coordinates.nc"
+    gam_path = kgo_dir / "gam_configs/samos_gam.pkl"
+    output_path = tmp_path / "output.nc"
+
+    gam_features = "projection_y_coordinate,projection_x_coordinate,height"
+
+    args = [
+        input_path,
+        samos_est_path,
+        gam_path,
+        "--gam-features",
+        gam_features,
+        "--realizations-count",
+        "9",
+        "--random-seed",
+        "0",
+        "--output",
+        output_path,
+    ]
+    run_cli(args)
+    acc.compare(output_path, kgo_path, atol=LOOSE_TOLERANCE)
+
+
+def test_normal_gam_cubes(tmp_path):
+    """
+    Test apply-samos-coefficients for diagnostic with assumed
+    normal distribution and additional features provided as cubes.
+    """
+    kgo_dir = acc.kgo_root() / "apply-samos-coefficients"
+    kgo_path = kgo_dir / "kgo_cubes_additional_cubes.nc"
+
+    input_path = kgo_dir / "forecast.nc"
+    samos_est_path = kgo_dir / "samos_coefficients/coefficients_extra_features.nc"
+    gam_path = kgo_dir / "gam_configs/samos_gam_additional_features.pkl"
+    output_path = tmp_path / "output.nc"
+    additional_features_path = kgo_dir / "additional_features/roughness_length.nc"
+    gam_features = (
+        "projection_y_coordinate,projection_x_coordinate,vegetative_roughness_length"
+    )
+
+    args = [
+        input_path,
+        samos_est_path,
+        additional_features_path,
+        gam_path,
+        "--gam-features",
+        gam_features,
+        "--realizations-count",
+        "9",
+        "--random-seed",
+        "0",
+        "--output",
+        output_path,
+    ]
+    run_cli(args)
+    acc.compare(output_path, kgo_path, atol=LOOSE_TOLERANCE)
+
+
+def test_normal_sites(tmp_path):
+    """
+    Test apply-samos-coefficients for diagnostic with assumed
+    normal distribution at sites.
+    """
+    kgo_dir = acc.kgo_root() / "apply-samos-coefficients"
+    kgo_path = kgo_dir / "kgo_sites.nc"
+
+    input_path = kgo_dir / "site_forecast.nc"
+    samos_est_path = kgo_dir / "samos_coefficients/coefficients_sites.nc"
+    gam_path = kgo_dir / "gam_configs/samos_gam_sites.pkl"
+    output_path = tmp_path / "output.nc"
+    gam_features = "latitude,longitude,height"
+
+    args = [
+        input_path,
+        samos_est_path,
+        gam_path,
+        "--gam-features",
+        gam_features,
+        "--realizations-count",
+        "9",
+        "--random-seed",
+        "0",
+        "--output",
+        output_path,
+    ]
+    run_cli(args)
+    acc.compare(output_path, kgo_path, atol=LOOSE_TOLERANCE)
+
+
+def test_no_coefficients(tmp_path):
+    """Test apply-samos-coefficients when no coefficients provided"""
+    kgo_dir = acc.kgo_root() / "apply-samos-coefficients/"
+    kgo_path = kgo_dir / "kgo_with_comment.nc"
+
+    input_path = kgo_dir / "site_forecast.nc"
+    gam_path = kgo_dir / "gam_configs/samos_gam_sites.pkl"
+    output_path = tmp_path / "output.nc"
+    additional_features_path = kgo_dir / "additional_features/roughness_length.nc"
+    gam_features = (
+        "projection_y_coordinate,projection_x_coordinate,vegetative_roughness_length"
+    )
+
+    args = [
+        input_path,
+        additional_features_path,
+        gam_path,
+        "--gam-features",
+        gam_features,
+        "--realizations-count",
+        "9",
+        "--random-seed",
+        "0",
+        "--output",
+        output_path,
+    ]
+
+    with pytest.warns(UserWarning, match=".*no coefficients provided.*"):
+        run_cli(args)
+
+    # Check output matches kgo.
+    acc.compare(output_path, kgo_path, atol=LOOSE_TOLERANCE)
+    # Check output only differs from input due to addition of a comment.
+    acc.compare(
+        output_path, input_path, atol=LOOSE_TOLERANCE, exclude_attributes=["comment"]
+    )
diff --git a/improver_tests/acceptance/test_estimate_samos_coefficients.py b/improver_tests/acceptance/test_estimate_samos_coefficients.py
new file mode 100644
index 0000000000..8b2a431e5b
--- /dev/null
+++ b/improver_tests/acceptance/test_estimate_samos_coefficients.py
@@ -0,0 +1,140 @@
+# (C) Crown Copyright, Met Office. All rights reserved.
+#
+# This file is part of 'IMPROVER' and is released under the BSD 3-Clause license.
+# See LICENSE in the root of the repository for full licensing details.
+"""
+Tests for the estimate-samos-coefficients CLI
+
+Many of these tests use globs which are expanded by IMPROVER code itself,
+rather than by shell glob expansion. There are also some directory globs
+which expand directory names in addition to filenames.
+"""
+
+import pytest
+
+from . import acceptance as acc
+
+pytestmark = [pytest.mark.acc, acc.skip_if_kgo_missing]
+CLI = acc.cli_name_with_dashes(__file__)
+run_cli = acc.run_cli(CLI)
+
+pytest.importorskip("statsmodels")
+
+# The EMOS estimation tolerance is defined in units of the variable being
+# calibrated - not in terms of the EMOS coefficients produced by
+# estimate-emos-coefficients and compared against KGOs here.
+# See comments and CLI help messages in
+# improver/cli/estimate_emos_coefficients.py for more detail.
+EST_EMOS_TOLERANCE = 1e-4
+
+# The EMOS coefficients are expected to vary by at most one order of magnitude
+# more than the CRPS tolerance specified.
+COMPARE_EMOS_TOLERANCE = EST_EMOS_TOLERANCE * 10
+
+# Pre-convert to string for easier use in each test
+EST_EMOS_TOL = str(EST_EMOS_TOLERANCE)
+
+
+@pytest.mark.slow
+def test_coordinates(tmp_path):
+    """
+    Test estimate-samos-coefficients for diagnostic with assumed
+    normal distribution and additional gam features are coordinates on
+    the input cubes.
+    """
+    # The source data is from the estimate-emos-coefficients acceptance tests
+    source_emos_dir = acc.kgo_root() / "estimate-emos-coefficients/normal"
+    history_path = source_emos_dir / "history/*.nc"
+    truth_path = source_emos_dir / "truth/*.nc"
+
+    kgo_dir = acc.kgo_root() / "estimate-samos-coefficients"
+    output_path = tmp_path / "output.nc"
+    kgo_path = kgo_dir / "kgo_coordinates.nc"
+    gam_path = kgo_dir / "gam_configs/samos_gam.pkl"
+    gam_features = "projection_y_coordinate,projection_x_coordinate,height"
+
+    args = [
+        history_path,
+        truth_path,
+        gam_path,
+        "--truth-attribute",
+        "mosg__model_configuration=uk_det",
+        "--gam-features",
+        gam_features,
+        "--output",
+        output_path,
+    ]
+    run_cli(args)
+    acc.compare(
+        output_path, kgo_path, atol=COMPARE_EMOS_TOLERANCE, rtol=COMPARE_EMOS_TOLERANCE
+    )
+
+
+def test_normal_cube_gam_features(tmp_path):
+    """
+    Test estimate-samos-coefficients for diagnostic with assumed
+    normal distribution and additional features provided as a cube.
+    """
+    source_emos_dir = acc.kgo_root() / "estimate-emos-coefficients/normal"
+    history_path = source_emos_dir / "history/*.nc"
+    truth_path = source_emos_dir / "truth/*.nc"
+
+    kgo_dir = acc.kgo_root() / "estimate-samos-coefficients"
+    additional_features_path = kgo_dir / "additional_features/roughness_length.nc"
+    output_path = tmp_path / "output.nc"
+    kgo_path = kgo_dir / "kgo_extra_gam_feature.nc"
+    gam_path = kgo_dir / "gam_configs/samos_gam_additional_features.pkl"
+    gam_features = (
+        "projection_y_coordinate,projection_x_coordinate,vegetative_roughness_length"
+    )
+
+    args = [
+        history_path,
+        truth_path,
+        additional_features_path,
+        gam_path,
+        "--truth-attribute",
+        "mosg__model_configuration=uk_det",
+        "--gam-features",
+        gam_features,
+        "--output",
+        output_path,
+    ]
+    run_cli(args)
+    acc.compare(
+        output_path, kgo_path, atol=COMPARE_EMOS_TOLERANCE, rtol=COMPARE_EMOS_TOLERANCE
+    )
+
+
+def test_estimate_samos_coefficients_sites(tmp_path):
+    """
+    Test estimate-samos-coefficients for site data with a diagnostic with and assumed
+    normal distribution and additional gam features are coordinates on
+    the input cubes.
+    """
+    # The source data is from the estimate-emos-coefficients acceptance tests
+    source_emos_dir = acc.kgo_root() / "estimate-emos-coefficients/normal/sites"
+    history_path = source_emos_dir / "history/*.nc"
+    truth_path = source_emos_dir / "truth/*.nc"
+
+    kgo_dir = acc.kgo_root() / "estimate-samos-coefficients"
+    output_path = tmp_path / "output.nc"
+    kgo_path = kgo_dir / "kgo_sites.nc"
+    gam_path = kgo_dir / "gam_configs/samos_gam_sites.pkl"
+    gam_features = "latitude,longitude,height"
+
+    args = [
+        history_path,
+        truth_path,
+        gam_path,
+        "--truth-attribute",
+        "mosg__model_configuration=uk_det",
+        "--gam-features",
+        gam_features,
+        "--output",
+        output_path,
+    ]
+    run_cli(args)
+    acc.compare(
+        output_path, kgo_path, atol=COMPARE_EMOS_TOLERANCE, rtol=COMPARE_EMOS_TOLERANCE
+    )
diff --git a/improver_tests/acceptance/test_estimate_samos_coefficients_from_table.py b/improver_tests/acceptance/test_estimate_samos_coefficients_from_table.py
new file mode 100644
index 0000000000..6e6a4536f1
--- /dev/null
+++ b/improver_tests/acceptance/test_estimate_samos_coefficients_from_table.py
@@ -0,0 +1,147 @@
+# (C) Crown Copyright, Met Office. All rights reserved.
+#
+# This file is part of 'IMPROVER' and is released under the BSD 3-Clause license.
+# See LICENSE in the root of the repository for full licensing details.
+"""
+Tests for the estimate-samos-coefficients-from-table CLI
+"""
+
+import pytest
+
+from . import acceptance as acc
+
+pytestmark = [pytest.mark.acc, acc.skip_if_kgo_missing]
+CLI = acc.cli_name_with_dashes(__file__)
+run_cli = acc.run_cli(CLI)
+
+for mod in ["pyarrow", "statsmodels"]:
+    pytest.importorskip(mod)
+
+# The EMOS estimation tolerance is defined in units of the variable being
+# calibrated - not in terms of the EMOS coefficients produced by
+# estimate-emos-coefficients and compared against KGOs here.
+# See comments and CLI help messages in
+# improver/cli/estimate_emos_coefficients.py for more detail.
+EST_EMOS_TOLERANCE = 1e-4
+
+# The EMOS coefficients are expected to vary by at most one order of magnitude
+# more than the CRPS tolerance specified.
+COMPARE_EMOS_TOLERANCE = EST_EMOS_TOLERANCE * 10
+
+# Pre-convert to string for easier use in each test
+EST_EMOS_TOL = str(EST_EMOS_TOLERANCE)
+
+
+@pytest.mark.slow
+def test_additional_features_coords(tmp_path):
+    """
+    Test estimate-samos-coefficients-from-table with an example forecast and truth
+    table for screen temperature.Extra features for the GAMs are provided
+    as coordinates on the forecast and truth cubes.
+    """
+    source_dir = acc.kgo_root() / "estimate-emos-coefficients-from-table/"
+    history_path = source_dir / "forecast_table"
+    truth_path = source_dir / "truth_table"
+
+    kgo_dir = acc.kgo_root() / "estimate-samos-coefficients-from-table/"
+    kgo_path = kgo_dir / "kgo_coordinates.nc"
+    output_path = tmp_path / "output.nc"
+
+    gam_config = kgo_dir / "gam_coordinates.pkl"
+
+    compulsory_args = [history_path, truth_path, gam_config]
+    named_args = [
+        "--gam-features",
+        "latitude,longitude,altitude",
+        "--percentiles",
+        "10,20,30,40,50,60,70,80,90",
+        "--forecast-period",
+        "86400",
+        "--training-length",
+        "5",
+        "--diagnostic",
+        "temperature_at_screen_level",
+        "--cycletime",
+        "20210805T2100Z",
+        "--output",
+        output_path,
+    ]
+
+    run_cli(compulsory_args + named_args)
+    acc.compare(
+        output_path, kgo_path, atol=COMPARE_EMOS_TOLERANCE, rtol=COMPARE_EMOS_TOLERANCE
+    )
+
+
+@pytest.mark.slow
+def test_additional_gam_features_cube(tmp_path):
+    """
+    Test estimate-samos-coefficients-from-table with an example forecast and truth
+    table for screen temperature. Extra features for the GAMs are provided
+    as a cube.
+    """
+    source_dir = acc.kgo_root() / "estimate-emos-coefficients-from-table/"
+    history_path = source_dir / "forecast_table"
+    truth_path = source_dir / "truth_table"
+
+    kgo_dir = acc.kgo_root() / "estimate-samos-coefficients-from-table/"
+    kgo_path = kgo_dir / "kgo_gam_cube.nc"
+    output_path = tmp_path / "output.nc"
+    gam_additional_features = kgo_dir / "distance_to_water.nc"
+    gam_config = kgo_dir / "gam_coordinates.pkl"
+
+    compulsory_args = [history_path, truth_path, gam_config, gam_additional_features]
+    named_args = [
+        "--gam-features",
+        "latitude,longitude,distance_to_water",
+        "--percentiles",
+        "10,20,30,40,50,60,70,80,90",
+        "--forecast-period",
+        "86400",
+        "--training-length",
+        "5",
+        "--diagnostic",
+        "temperature_at_screen_level",
+        "--cycletime",
+        "20210805T2100Z",
+        "--output",
+        output_path,
+    ]
+
+    run_cli(compulsory_args + named_args)
+    acc.compare(
+        output_path, kgo_path, atol=COMPARE_EMOS_TOLERANCE, rtol=COMPARE_EMOS_TOLERANCE
+    )
+
+
+@pytest.mark.slow
+def test_return_none(tmp_path):
+    """
+    Test that None is returned if cube cannot be created from table.
+    """
+    source_dir = acc.kgo_root() / "estimate-emos-coefficients-from-table/"
+    history_path = source_dir / "forecast_table_quantiles"
+    truth_path = source_dir / "truth_table"
+
+    kgo_dir = acc.kgo_root() / "estimate-samos-coefficients-from-table/"
+    output_path = tmp_path / "output.nc"
+    gam_config = kgo_dir / "gam_coordinates.pkl"
+
+    compulsory_args = [history_path, truth_path, gam_config]
+    named_args = [
+        "--gam-features",
+        "projection_y_coordinate,projection_x_coordinate,height",
+        "--percentiles",
+        "10,20,30,40,50,60,70,80,90",
+        "--forecast-period",
+        "3600000",
+        "--training-length",
+        "5",
+        "--diagnostic",
+        "temperature_at_screen_level",
+        "--cycletime",
+        "20210805T2100Z",
+        "--output",
+        output_path,
+    ]
+    assert run_cli(compulsory_args + named_args) is None
diff --git a/improver_tests/acceptance/test_estimate_samos_gams.py b/improver_tests/acceptance/test_estimate_samos_gams.py
new file mode 100644
index 0000000000..05b084ae90
--- /dev/null
+++ b/improver_tests/acceptance/test_estimate_samos_gams.py
@@ -0,0 +1,142 @@
+# (C) Crown Copyright, Met Office. All rights reserved.
+#
+# This file is part of 'IMPROVER' and is released under the BSD 3-Clause license.
+# See LICENSE in the root of the repository for full licensing details.
+"""
+Tests for the estimate-samos-gams CLI
+"""
+
+import pytest
+
+from . import acceptance as acc
+
+pytestmark = [pytest.mark.acc, acc.skip_if_kgo_missing]
+CLI = acc.cli_name_with_dashes(__file__)
+run_cli = acc.run_cli(CLI)
+
+pytest.importorskip("statsmodels")
+
+TOLERANCE = str(1e-4)
+
+
+@pytest.mark.slow
+def test_gam_features_on_cube(tmp_path):
+    """
+    Test estimate-samos-gams-coefficients for diagnostic with assumed
+    normal distribution.
+    """
+    source_emos_dir = acc.kgo_root() / "estimate-emos-coefficients/normal"
+    history_path = source_emos_dir / "history/*.nc"
+    truth_path = source_emos_dir / "truth/*.nc"
+
+    kgo_dir = acc.kgo_root() / "estimate-samos-gam"
+    kgo_path = kgo_dir / "kgo.pkl"
+    model_specification_path = kgo_dir / "samos_model_spec_simple.json"
+    output_path = tmp_path / "output.pkl"
+
+    gam_features = "projection_y_coordinate,projection_x_coordinate,height"
+    args = [
+        history_path,
+        truth_path,
+        "--distribution",
+        "normal",
+        "--truth-attribute",
+        "mosg__model_configuration=uk_det",
+        "--tolerance",
+        TOLERANCE,
+        "--gam-features",
+        gam_features,
+        "--model-specification",
+        model_specification_path,
+        "--output",
+        output_path,
+    ]
+    run_cli(args)
+
+    # Compare the output with the known good output. This
+    # comparison only ensures that the string version of the
+    # pickled objects are the same, not the actual objects as
+    # there is no function to compare the GAM class objects.
+    acc.compare(output_path, kgo_path, file_type="generic_pickle")
+
+
+def test_gam_cube_gam_features(tmp_path):
+    """
+    Test estimate-samos-gams-coefficients for diagnostic with assumed
+    normal distribution and additional features provided as a cube.
+    """
+    source_emos_dir = acc.kgo_root() / "estimate-emos-coefficients/normal"
+    history_path = source_emos_dir / "history/*.nc"
+    truth_path = source_emos_dir / "truth/*.nc"
+
+    kgo_dir = acc.kgo_root() / "estimate-samos-gam"
+    kgo_path = kgo_dir / "kgo.pkl"
+    additional_features_path = kgo_dir / "roughness_length.nc"
+    model_specification_path = kgo_dir / "samos_model_spec_simple.json"
+    output_path = tmp_path / "output.pkl"
+
+    gam_features = (
+        "projection_x_coordinate,projection_y_coordinate,vegetative_roughness_length"
+    )
+    args = [
+        history_path,
+        truth_path,
+        additional_features_path,
+        "--distribution",
+        "normal",
+        "--truth-attribute",
+        "mosg__model_configuration=uk_det",
+        "--tolerance",
+        TOLERANCE,
+        "--gam-features",
+        gam_features,
+        "--model-specification",
+        model_specification_path,
+        "--output",
+        output_path,
+    ]
+    run_cli(args)
+    # Compare the output with the known good output. This
+    # comparison only ensures that the string version of the
+    # pickled objects are the same, not the actual objects as
+    # there is no function to compare the GAM class objects.
+    acc.compare(output_path, kgo_path, file_type="generic_pickle")
+
+
+def test_gam_at_sites():
+    """
+    Test estimate-samos-gams-coefficients for diagnostic with assumed
+    normal distribution and additional features provided as a cube.
+    """
+    source_emos_dir = acc.kgo_root() / "estimate-emos-coefficients/normal/sites"
+    history_path = source_emos_dir / "history/*.nc"
+    truth_path = source_emos_dir / "truth/*.nc"
+
+    kgo_dir = acc.kgo_root() / "estimate-samos-gam"
+    kgo_path = kgo_dir / "kgo_sites.pkl"
+    model_specification_path = kgo_dir / "samos_model_spec_simple.json"
+    output_path = kgo_dir / "output.pkl"
+
+    gam_features = "latitude,longitude,height"
+    args = [
+        history_path,
+        truth_path,
+        "--distribution",
+        "normal",
+        "--truth-attribute",
+        "mosg__model_configuration=uk_det",
+        "--tolerance",
+        TOLERANCE,
+        "--gam-features",
+        gam_features,
+        "--model-specification",
+        model_specification_path,
+        "--output",
+        output_path,
+    ]
+    run_cli(args)
+    # Compare the output with the known good output. This
+    # comparison only ensures that the string version of the
+    # pickled objects are the same, not the actual objects as
+    # there is no function to compare the GAM class objects.
+    acc.compare(output_path, kgo_path, file_type="generic_pickle")
diff --git a/improver_tests/acceptance/test_estimate_samos_gams_from_table.py b/improver_tests/acceptance/test_estimate_samos_gams_from_table.py
new file mode 100644
index 0000000000..928cef52af
--- /dev/null
+++ b/improver_tests/acceptance/test_estimate_samos_gams_from_table.py
@@ -0,0 +1,169 @@
+# (C) Crown Copyright, Met Office. All rights reserved.
+#
+# This file is part of 'IMPROVER' and is released under the BSD 3-Clause license.
+# See LICENSE in the root of the repository for full licensing details.
+"""
+Tests for the estimate-samos-gams-from-table CLI
+
+"""
+
+import pytest
+
+from . import acceptance as acc
+
+pytestmark = [pytest.mark.acc, acc.skip_if_kgo_missing]
+CLI = acc.cli_name_with_dashes(__file__)
+run_cli = acc.run_cli(CLI)
+
+for mod in ["pyarrow", "statsmodels"]:
+    pytest.importorskip(mod)
+
+
+@pytest.mark.slow
+def test_additional_features_coords(
+    tmp_path,
+):
+    """
+    Test estimate-samos-gams-from-table with an example forecast and truth
+    table for screen temperature. Extra features for the GAMs are provided
+    as coordinates on the forecast and truth cubes.
+    """
+    source_dir = acc.kgo_root() / "estimate-emos-coefficients-from-table/"
+    history_path = source_dir / "forecast_table"
+    truth_path = source_dir / "truth_table"
+
+    kgo_dir = acc.kgo_root() / "estimate-samos-gams-from-table/"
+    kgo_path = kgo_dir / "kgo_coords.pkl"
+
+    output_path = tmp_path / "output.pkl"
+    compulsory_args = [history_path, truth_path]
+    named_args = [
+        "--diagnostic",
+        "temperature_at_screen_level",
+        "--cycletime",
+        "20210805T2100Z",
+        "--forecast-period",
+        "86400",
+        "--training-length",
+        "5",
+        "--distribution",
+        "normal",
+        "--tolerance",
+        "1e-4",
+        "--gam-features",
+        "latitude,longitude,altitude",
+        "--model-specification",
+        kgo_dir / "samos_model_spec_simple.json",
+        "--percentiles",
+        "10,20,30,40,50,60,70,80,90",
+        "--output",
+        output_path,
+    ]
+
+    run_cli(compulsory_args + named_args)
+    # Compare the output with the known good output. This
+    # comparison only ensures that the string version of the
+    # pickled objects are the same, not the actual objects as
+    # there is no function to compare the GAM class objects.
+    acc.compare(output_path, kgo_path, file_type="generic_pickle")
+
+
+@pytest.mark.slow
+def test_additional_features_cube(
+    tmp_path,
+):
+    """
+    Test estimate-samos-gams-from-table with an example forecast and truth
+    table for screen temperature. Extra features for the GAMs are provided
+    as a cube.
+    """
+    source_dir = acc.kgo_root() / "estimate-emos-coefficients-from-table/"
+    history_path = source_dir / "forecast_table"
+    truth_path = source_dir / "truth_table"
+
+    kgo_dir = acc.kgo_root() / "estimate-samos-gams-from-table/"
+    kgo_path = kgo_dir / "kgo_cubes.pkl"
+    additional_features = kgo_dir / "distance_to_water.nc"
+
+    output_path = tmp_path / "output.pkl"
+    compulsory_args = [history_path, truth_path, additional_features]
+    named_args = [
+        "--diagnostic",
+        "temperature_at_screen_level",
+        "--cycletime",
+        "20210805T2100Z",
+        "--forecast-period",
+        "86400",
+        "--training-length",
+        "5",
+        "--distribution",
+        "normal",
+        "--tolerance",
+        "1e-4",
+        "--gam-features",
+        "latitude,longitude,distance_to_water",
+        "--model-specification",
+        kgo_dir / "samos_model_spec_simple.json",
+        "--percentiles",
+        "10,20,30,40,50,60,70,80,90",
+        "--output",
+        output_path,
+    ]
+
+    run_cli(compulsory_args + named_args)
+    # Compare the output with the known good output. This
+    # comparison only ensures that the string version of the
+    # pickled objects are the same, not the actual objects as
+    # there is no function to compare the GAM class objects.
+    acc.compare(output_path, kgo_path, file_type="generic_pickle")
+
+
+@pytest.mark.slow
+def test_additional_features_cubes(
+    tmp_path,
+):
+    """
+    Test estimate-samos-gams-from-table with an example forecast and truth
+    table for screen temperature. Extra features for the GAMs are provided
+    by multiple cubes. This test demonstrates that the CLI can accept
+    multiple additional predictor cubes.
+    """
+    source_dir = acc.kgo_root() / "estimate-emos-coefficients-from-table/"
+    history_path = source_dir / "forecast_table"
+    truth_path = source_dir / "truth_table"
+
+    kgo_dir = acc.kgo_root() / "estimate-samos-gams-from-table/"
+    kgo_path = kgo_dir / "kgo_cubes.pkl"
+    additional_features = [kgo_dir / "distance_to_water.nc", kgo_dir / "roughness.nc"]
+
+    output_path = tmp_path / "output.pkl"
+    compulsory_args = [history_path, truth_path, *additional_features]
+    named_args = [
+        "--diagnostic",
+        "temperature_at_screen_level",
+        "--cycletime",
+        "20210805T2100Z",
+        "--forecast-period",
+        "86400",
+        "--training-length",
+        "5",
+        "--distribution",
+        "normal",
+        "--tolerance",
+        "1e-4",
+        "--gam-features",
+        "latitude,longitude,distance_to_water,vegetative_roughness_length",
+        "--model-specification",
+        kgo_dir / "samos_model_spec_simple.json",
+        "--percentiles",
+        "10,20,30,40,50,60,70,80,90",
+        "--output",
+        output_path,
+    ]
+
+    run_cli(compulsory_args + named_args)
+    # Compare the output with the known good output. This
+    # comparison only ensures that the string version of the
+    # pickled objects are the same, not the actual objects as
+    # there is no function to compare the GAM class objects.
+    acc.compare(output_path, kgo_path, file_type="generic_pickle")
diff --git a/improver_tests/calibration/samos_calibration/helper_functions.py b/improver_tests/calibration/samos_calibration/helper_functions.py
index f224326d20..0a35681b7d 100644
--- a/improver_tests/calibration/samos_calibration/helper_functions.py
+++ b/improver_tests/calibration/samos_calibration/helper_functions.py
@@ -16,6 +16,12 @@
     set_up_variable_cube,
 )
 
+FORECAST_ATTRIBUTES = {
+    "title": "MOGREPS-UK Forecast",
+    "source": "Met Office Unified Model",
+    "institution": "Met Office",
+}
+
 
 def create_simple_cube(
     forecast_type: str,
@@ -138,7 +144,7 @@ def create_cubes_for_gam_fitting(
     # Create array of random noise which increases with x and y, so that there is
     # some variance in the data to model in the standard deviation GAM.
     rng = np.random.RandomState(210825)  # Set seed for reproducible results.
-    noise = rng.normal(loc=0.0, scale=addition / 30)
+    noise = rng.normal(loc=0.0, scale=0.05 + (addition / 30))
     input_cube.data = input_cube.data + addition + noise
 
     additional_cubes = iris.cube.CubeList([])
diff --git a/improver_tests/calibration/samos_calibration/test_ApplySAMOS.py b/improver_tests/calibration/samos_calibration/test_ApplySAMOS.py
new file mode 100644
index 0000000000..f990968d8d
--- /dev/null
+++ b/improver_tests/calibration/samos_calibration/test_ApplySAMOS.py
@@ -0,0 +1,333 @@
+# (C) Crown Copyright, Met Office. All rights reserved.
+#
+# This file is part of 'IMPROVER' and is released under the BSD 3-Clause license.
+# See LICENSE in the root of the repository for full licensing details.
+"""Unit tests for the ApplySAMOS class within samos_calibration.py"""
+
+import numpy as np
+import pytest
+from iris.cube import CubeList
+
+from improver.calibration.samos_calibration import (
+    ApplySAMOS,
+    TrainGAMsForSAMOS,
+)
+from improver.threshold import Threshold
+from improver_tests.calibration.emos_calibration.test_ApplyEMOS import (
+    build_coefficients_cubelist,
+)
+from improver_tests.calibration.samos_calibration.helper_functions import (
+    FORECAST_ATTRIBUTES,
+    create_cubes_for_gam_fitting,
+)
+
+EXPECTED_DATA_DICT = {
+    "real_real_emosalt_notgamalt": np.array(
+        [
+            [
+                [313.50665, 305.0316, 287.89963],
+                [280.07907, 287.8345, 296.38815],
+                [333.15, 333.15, 304.0257],
+            ],
+            [
+                [313.4974, 304.8424, 288.25708],
+                [279.9784, 288.0995, 295.84637],
+                [333.15, 333.15, 302.26242],
+            ],
+            [
+                [313.50204, 304.65323, 288.61453],
+                [280.17975, 287.967, 296.11728],
+                [333.15, 333.15, 303.14404],
+            ],
+        ],
+        dtype=np.float32,
+    ),
+    "real_real_notemosalt_notgamalt": np.array(
+        [
+            [
+                [273.1705, 280.7735, 287.9103],
+                [280.7542, 288.1842, 296.4416],
+                [288.2182, 295.7106, 304.0292],
+            ],
+            [
+                [273.1613, 280.58432, 288.2678],
+                [280.65353, 288.44922, 295.8998],
+                [288.01764, 295.00946, 302.26596],
+            ],
+            [
+                [273.1659, 280.39514, 288.62524],
+                [280.8549, 288.3167, 296.17072],
+                [288.41873, 296.41177, 303.1476],
+            ],
+        ],
+        dtype=np.float32,
+    ),
+    "real_real_emosalt_gamalt": np.array(
+        [
+            [
+                [313.50665, 305.0316, 287.89963],
+                [280.07907, 287.8345, 296.38815],
+                [333.15, 333.15, 304.0257],
+            ],
+            [
+                [313.4974, 304.8424, 288.25708],
+                [279.9784, 288.0995, 295.84637],
+                [333.15, 333.15, 302.26242],
+            ],
+            [
+                [313.50204, 304.65323, 288.61453],
+                [280.17975, 287.967, 296.11728],
+                [333.15, 333.15, 303.14404],
+            ],
+        ],
+        dtype=np.float32,
+    ),
+    "real_real_notemosalt_gamalt": np.array(
+        [
+            [
+                [273.1705, 280.77353, 287.91034],
+                [280.7542, 288.18423, 296.4416],
+                [288.2182, 295.7106, 304.02924],
+            ],
+            [
+                [273.1613, 280.58432, 288.2678],
+                [280.65353, 288.44922, 295.8998],
+                [288.01764, 295.00946, 302.26596],
+            ],
+            [
+                [273.1659, 280.39514, 288.62524],
+                [280.8549, 288.3167, 296.17072],
+                [288.41873, 296.41177, 303.1476],
+            ],
+        ],
+        dtype=np.float32,
+    ),
+    "real_prob_emosalt_notgamalt": np.array(
+        [
+            [
+                [1.0000, 1.0000, 0.6862],
+                [0.0000, 0.4333, 1.0000],
+                [1.0000, 1.0000, 1.0000],
+            ],
+            [
+                [1.0000, 1.0000, 0.0000],
+                [0.0000, 0.0000, 0.0000],
+                [1.0000, 1.0000, 0.5439],
+            ],
+        ],
+        dtype=np.float32,
+    ),
+    "prob_prob_notemosalt_notgamalt": np.array(
+        [
+            [
+                [0.9772, 1.0000, 0.9974],
+                [1.0000, 0.9974, 0.9974],
+                [0.9974, 0.9974, 0.9974],
+            ],
+            [
+                [0.0228, 0.9772, 0.9918],
+                [0.9772, 0.9918, 0.9918],
+                [0.9918, 0.9918, 0.9918],
+            ],
+            [
+                [0.0000, 0.0228, 0.9772],
+                [0.0228, 0.9772, 0.9772],
+                [0.9772, 0.9772, 0.9772],
+            ],
+        ],
+        dtype=np.float32,
+    ),
+    "perc_perc_notemosalt_notgamalt": np.array(
+        [
+            [
+                [273.1613, 280.39514, 287.91034],
+                [280.65353, 288.18423, 295.8998],
+                [288.01764, 295.00946, 302.266],
+            ],
+            [
+                [273.1659, 280.58432, 288.2678],
+                [280.7542, 288.3167, 296.17072],
+                [288.2182, 295.7106, 303.1476],
+            ],
+            [
+                [273.1705, 280.7735, 288.62524],
+                [280.8549, 288.44922, 296.44162],
+                [288.41873, 296.41177, 304.02924],
+            ],
+        ],
+        dtype=np.float32,
+    ),
+    "perc_prob_notemosalt_notgamalt": np.array(
+        [
+            [
+                [0.0000, 0.0000, 0.6933],
+                [0.0000, 0.9466, 1.0000],
+                [0.7685, 1.0000, 1.0000],
+            ],
+            [
+                [0.0000, 0.0000, 0.0000],
+                [0.0000, 0.0000, 0.0000],
+                [0.0000, 0.0000, 0.5450],
+            ],
+        ],
+        dtype=np.float32,
+    ),
+    "real_prob_notemosalt_notgamalt": np.array(
+        [
+            [
+                [0.0000, 0.0000, 0.6933],
+                [0.0000, 0.9466, 1.0000],
+                [0.7685, 1.0000, 1.0000],
+            ],
+            [
+                [0.0000, 0.0000, 0.0000],
+                [0.0000, 0.0000, 0.0000],
+                [0.0000, 0.0000, 0.5450],
+            ],
+        ],
+        dtype=np.float32,
+    ),
+}
+
+
+def get_expected_result(
+    input_format, output_format, emos_include_altitude, gam_include_altitude
+):
+    """Function to get the expected result of the process() method."""
+    expected_name = "air_temperature"
+    if output_format == "probability":
+        expected_name = "probability_of_air_temperature_above_threshold"
+
+    expected_dims = (
+        ["air_temperature", "latitude", "longitude"]
+        if output_format == "probability"
+        else [output_format, "latitude", "longitude"]
+    )
+
+    emos_field = "emosalt" if emos_include_altitude else "notemosalt"
+    gam_field = "gamalt" if gam_include_altitude else "notgamalt"
+    expected_data_key = (
+        f"{input_format[:4]}_{output_format[:4]}_{emos_field}_{gam_field}"
+    )
+    try:
+        expected_data = EXPECTED_DATA_DICT[expected_data_key]
+    except KeyError:
+        expected_data = None
+
+    return expected_data, expected_name, expected_dims
+
+
+@pytest.mark.parametrize("percentiles", [None, 5, [5], [5, 10, 15]])
+def test__init__(percentiles):
+    """Test that the class initializes variables correctly."""
+    # Skip test if pyGAM not available.
+    pytest.importorskip("pygam")
+
+    if percentiles == 5:
+        msg = "'int' object is not iterable"
+        with pytest.raises(TypeError, match=msg):
+            ApplySAMOS(percentiles)
+    else:
+        result = ApplySAMOS(percentiles)
+        assert getattr(result, "percentiles") == percentiles
+
+
+@pytest.mark.parametrize(
+    "input_format,output_format,emos_include_altitude,gam_include_altitude",
+    [
+        ["realization", "realization", False, False],
+        ["realization", "realization", True, False],
+        ["realization", "realization", False, True],
+        ["realization", "realization", True, True],
+        ["realization", "probability", False, False],
+        ["realization", "probability", True, False],
+        ["percentile", "percentile", False, False],
+        ["percentile", "probability", False, False],
+        ["probability", "probability", False, False],
+    ],
+)
+def test_process(
+    input_format, output_format, emos_include_altitude, gam_include_altitude
+):
+    """Test that the process method returns the expected results."""
+    # Skip test if pyGAM not available.
+    pytest.importorskip("pygam")
+
+    # Set up model terms for spatial predictors.
+    model_specification = [["linear", [0], {}], ["linear", [1], {}]]
+    features = ["latitude", "longitude"]
+    n_spatial_points = 3
+    n_realizations = 3
+    n_times = 20
+
+    forecast_cube, additional_cubes = create_cubes_for_gam_fitting(
+        n_spatial_points=n_spatial_points,
+        n_realizations=n_realizations,
+        n_times=n_times,
+        include_altitude=emos_include_altitude,
+        fixed_forecast_period=True,
+    )
+
+    for key, value in FORECAST_ATTRIBUTES.items():
+        forecast_cube.attributes[key] = value
+
+    forecast_gams = TrainGAMsForSAMOS(model_specification).process(
+        forecast_cube, features, additional_cubes
+    )
+
+    forecast_slice = next(forecast_cube.slices_over(["forecast_reference_time"]))
+    if emos_include_altitude:
+        emos_coefficients = build_coefficients_cubelist(
+            forecast_slice,
+            [0, [0.9, 0.1], 0, 1],
+            CubeList([forecast_slice, additional_cubes[0]]),
+        )
+    else:
+        emos_coefficients = build_coefficients_cubelist(
+            forecast_slice,
+            [0, 1, 0, 1],
+            CubeList([forecast_slice]),
+        )
+
+    if input_format == "realization":
+        process_kwargs = {}
+    elif input_format == "percentile":
+        forecast_slice.coord("realization").rename("percentile")
+        forecast_slice.coord("percentile").points = np.array([25, 50, 75])
+        process_kwargs = {
+            "realizations_count": n_realizations,
+        }
+    elif input_format == "probability":
+        thresholds = [273, 278, 283]
+        forecast_slice = Threshold(thresholds, collapse_coord="realization").process(
+            forecast_slice
+        )
+        process_kwargs = {
+            "realizations_count": n_realizations,
+        }
+
+    if output_format == "probability" and not input_format == "probability":
+        thresholds = [288, 303]
+        prob_template = Threshold(thresholds, collapse_coord=input_format).process(
+            forecast_slice
+        )
+        process_kwargs.update({"prob_template": prob_template})
+
+    expected_data, expected_name, expected_dims = get_expected_result(
+        input_format, output_format, emos_include_altitude, gam_include_altitude
+    )
+
+    result = ApplySAMOS(percentiles=None).process(
+        forecast=forecast_slice,
+        forecast_gams=forecast_gams,
+        truth_gams=forecast_gams,
+        gam_features=features,
+        emos_coefficients=emos_coefficients,
+        gam_additional_fields=None,
+        emos_additional_fields=additional_cubes,
+        **process_kwargs,
+    )
+
+    np.testing.assert_array_almost_equal(result.data, expected_data, decimal=4)
+    assert result.name() == expected_name
+    assert [c.name() for c in result.coords(dim_coords=True)] == expected_dims
diff --git a/improver_tests/calibration/samos_calibration/test_TrainEMOSForSAMOS.py b/improver_tests/calibration/samos_calibration/test_TrainEMOSForSAMOS.py
index 5ad3332f61..12696a50a2 100644
--- a/improver_tests/calibration/samos_calibration/test_TrainEMOSForSAMOS.py
+++ b/improver_tests/calibration/samos_calibration/test_TrainEMOSForSAMOS.py
@@ -7,7 +7,10 @@
 import numpy as np
 import pytest
 
-from improver.calibration.samos_calibration import TrainEMOSForSAMOS, TrainGAMsForSAMOS
+from improver.calibration.samos_calibration import (
+    TrainEMOSForSAMOS,
+    TrainGAMsForSAMOS,
+)
 from improver_tests.calibration.samos_calibration.helper_functions import (
     create_cubes_for_gam_fitting,
     create_simple_cube,
@@ -42,91 +45,6 @@ def test__init__(kwargs):
         assert getattr(result, key) == kwargs[key]
 
 
-@pytest.mark.parametrize("include_altitude", [False, True])
-def test_get_climatological_stats(
-    include_altitude,
-):
-    """Test that the get_climatological_stats method returns the expected results."""
-    # Skip test if pyGAM not available.
-    pytest.importorskip("pygam")
-
-    # Set up model terms for spatial predictors.
-    model_specification = [["linear", [0], {}], ["linear", [1], {}]]
-    features = ["latitude", "longitude"]
-    n_spatial_points = 5
-    n_realizations = 5
-    n_times = 20
-
-    if include_altitude:
-        features.append("surface_altitude")
-        model_specification.append(["spline", [features.index("surface_altitude")], {}])
-
-    cube_for_gam, additional_cubes_for_gam = create_cubes_for_gam_fitting(
-        n_spatial_points=n_spatial_points,
-        n_realizations=n_realizations,
-        n_times=n_times,
-        include_altitude=include_altitude,
-    )
-
-    gams = TrainGAMsForSAMOS(model_specification).process(
-        cube_for_gam, features, additional_cubes_for_gam
-    )
-
-    cube_for_test, additional_cubes_for_test = create_cubes_for_gam_fitting(
-        n_spatial_points=2,
-        n_realizations=2,
-        n_times=1,
-        include_altitude=include_altitude,
-    )
-
-    result_mean, result_sd = TrainEMOSForSAMOS.get_climatological_stats(
-        cube_for_test, gams, features, additional_cubes_for_test
-    )
-
-    expected_mean = create_simple_cube(
-        forecast_type="gridded",
-        n_spatial_points=2,
-        n_realizations=2,
-        n_times=1,
-        fill_value=0.0,
-    )
-    expected_sd = expected_mean.copy()
-
-    if not include_altitude:
-        expected_mean.data = np.array(
-            [
-                [[284.39363425, 288.14659092], [288.14237183, 291.8953285]],
-                [[284.39363425, 288.14659092], [288.14237183, 291.8953285]],
-            ],
-            dtype=np.float32,
-        )
-        expected_sd.data = np.array(
-            [
-                [[0.36190698, 0.49423461], [0.48704575, 0.61937337]],
-                [[0.36190698, 0.49423461], [0.48704575, 0.61937337]],
-            ],
-            dtype=np.float32,
-        )
-    else:
-        expected_mean.data = np.array(
-            [
-                [[274.41442381, 288.1640568], [278.16316139, 291.91279438]],
-                [[274.41442381, 288.1640568], [278.16316139, 291.91279438]],
-            ],
-            dtype=np.float32,
-        )
-        expected_sd.data = np.array(
-            [
-                [[0.36048627, 0.50103523], [0.48562504, 0.626174]],
-                [[0.36048627, 0.50103523], [0.48562504, 0.626174]],
-            ],
-            dtype=np.float32,
-        )
-
-    assert result_mean == expected_mean
-    assert result_sd == expected_sd
-
-
 def test_climate_anomaly_emos():
     """Test that the climate_anomaly_emos method returns the expected results."""
     # Skip test if pyGAM not available.
@@ -229,9 +147,9 @@ def test_process(include_altitude):
         "emos_coefficient_delta",
     ]
     if include_altitude:
-        expected_data = [0.0450892, -0.1451314, 0.0, 1.05]
+        expected_data = [0.02605934, -0.11531556, 0.00025, 1.0]
     else:
-        expected_data = [-0.05576817, 0.08254312, 0.0, 1.0012305]
+        expected_data = [0.02422589, -0.11010934, 0.0, 1.0]
 
     for i, cube in enumerate(result):
         assert expected_names[i] == cube.name()
diff --git a/improver_tests/calibration/samos_calibration/test_TrainGAMsForSAMOS.py b/improver_tests/calibration/samos_calibration/test_TrainGAMsForSAMOS.py
index 41a726d03e..b5b15c56e8 100644
--- a/improver_tests/calibration/samos_calibration/test_TrainGAMsForSAMOS.py
+++ b/improver_tests/calibration/samos_calibration/test_TrainGAMsForSAMOS.py
@@ -286,7 +286,6 @@ def test_calculate_cube_statistics_exception(model_specification):
         "coordinate. The increments between points in the time coordinate "
         "were: \\[86400 86401\\]. The smallest increment was: 86400."
     )
-
     with pytest.raises(ValueError, match=msg):
         TrainGAMsForSAMOS(
             model_specification=model_specification
@@ -300,49 +299,49 @@ def test_calculate_cube_statistics_exception(model_specification):
             False,
             [["linear", [0], {}], ["linear", [1], {}]],
             11,
-            np.array([288.14942357, 0.50000486], dtype=np.float64),
+            np.array([288.1492261877016, 0.5494898838013565], dtype=np.float64),
         ],
         [
             True,
             [["linear", [0], {}], ["linear", [1], {}]],
             11,
-            np.array([288.16681245, 0.50060569], dtype=np.float64),
+            np.array([288.16844872649284, 0.5496814185908583], dtype=np.float64),
         ],
         [
             False,
             [["tensor", [0, 1], {}]],
             11,
-            np.array([288.13047912, 0.48075810], dtype=np.float64),
+            np.array([288.1285965937152, 0.5290237015452911], dtype=np.float64),
         ],
         [
             True,
             [["tensor", [0, 1], {}]],
             11,
-            np.array([288.13571987, 0.48000211], dtype=np.float64),
+            np.array([288.13439901991666, 0.5280646018499808], dtype=np.float64),
         ],
         [
             False,
             [["linear", [0], {}], ["linear", [1], {}]],
             1,
-            np.array([288.16497963, 0.51062603], dtype=np.float64),
+            np.array([288.1659800896903, 0.561020575955558], dtype=np.float64),
         ],
         [
             True,
             [["linear", [0], {}], ["linear", [1], {}]],
             1,
-            np.array([288.15651954, 0.49548990], dtype=np.float64),
+            np.array([288.15797722833526, 0.5427105296998994], dtype=np.float64),
         ],
         [
             False,
             [["tensor", [0, 1], {}]],
             1,
-            np.array([287.99195197, 0.46191877], dtype=np.float64),
+            np.array([287.97628879844785, 0.5074109794042518], dtype=np.float64),
         ],
         [
             True,
             [["tensor", [0, 1], {}]],
             1,
-            np.array([287.97881919, 0.44321410], dtype=np.float64),
+            np.array([287.9622424769424, 0.48765295694648797], dtype=np.float64),
         ],
     ],
 )
diff --git a/improver_tests/calibration/samos_calibration/test_samos_utilities.py b/improver_tests/calibration/samos_calibration/test_samos_utilities.py
index 3516b157e3..27c999b3cd 100644
--- a/improver_tests/calibration/samos_calibration/test_samos_utilities.py
+++ b/improver_tests/calibration/samos_calibration/test_samos_utilities.py
@@ -11,11 +11,13 @@
 import numpy as np
 import pandas as pd
 import pytest
-from iris.cube import Cube
+from iris.cube import Cube, CubeList
 from pandas.testing import assert_frame_equal
 
 from improver.calibration.samos_calibration import (
+    TrainGAMsForSAMOS,
     convert_dataframe_to_cube,
+    get_climatological_stats,
     prepare_data_for_gam,
 )
 from improver.synthetic_data.set_up_test_cubes import (
@@ -23,6 +25,7 @@
     set_up_variable_cube,
 )
 from improver_tests.calibration.samos_calibration.helper_functions import (
+    create_cubes_for_gam_fitting,
     create_simple_cube,
 )
 
@@ -163,7 +166,7 @@ def test_prepare_data_for_gam_gridded(
         set_up_kwargs=set_up_kwargs,
     )
 
-    additional_cubes = []
+    additional_cubes = CubeList()
     if include_altitude:
         additional_cubes.append(altitude_cube("gridded", set_up_kwargs))
         surface_altitude = np.array([10.0, 20.0, 20.0, 10.0] * 2, dtype=np.float32)
@@ -194,17 +197,23 @@ def test_prepare_data_for_gam_spot(
         fill_value=305.0,
     )
 
-    additional_cubes = []
+    additional_cubes = CubeList()
     if include_altitude:
         additional_cubes.append(altitude_cube("spot"))
         surface_altitude = np.array([10.0, 20.0] * 2, dtype=np.float32)
         spot_dataframe["surface_altitude"] = surface_altitude
     if include_land_fraction:
-        additional_cubes.append(land_fraction_cube("spot"))
+        land_fraction_cube_spot = land_fraction_cube("spot")
+        land_fraction_cube_spot.coord("wmo_id").points = np.array(
+            ["00000", "00003", "00002", "00001"], dtype="<U5"
+        )
+        additional_cubes.append(land_fraction_cube_spot)
         land_fraction = np.array([0.0, 0.3] * 2, dtype=np.float32)
         spot_dataframe["land_fraction"] = land_fraction
 
-    result = prepare_data_for_gam(input_cube, additional_cubes)
+    result = prepare_data_for_gam(
+        input_cube, additional_cubes, unique_site_id_key="wmo_id"
+    )
 
     assert np.all(result.columns == spot_dataframe.columns)
     assert_frame_equal(result, spot_dataframe)
@@ -268,3 +277,112 @@ def test_convert_dataframe_to_cube_spot(spot_dataframe):
     result = convert_dataframe_to_cube(spot_dataframe, template_cube)
 
     assert result == expected_cube
+
+
+@pytest.mark.parametrize("include_altitude", [False, True])
+def test_get_climatological_stats(
+    include_altitude,
+):
+    """Test that the get_climatological_stats method returns the expected results."""
+    # Skip test if pyGAM not available.
+    pytest.importorskip("pygam")
+
+    # Set up model terms for spatial predictors.
+    model_specification = [["linear", [0], {}], ["linear", [1], {}]]
+    features = ["latitude", "longitude"]
+    n_spatial_points = 5
+    n_realizations = 5
+    n_times = 20
+
+    if include_altitude:
+        features.append("surface_altitude")
+        model_specification.append(["spline", [features.index("surface_altitude")], {}])
+
+    cube_for_gam, additional_cubes_for_gam = create_cubes_for_gam_fitting(
+        n_spatial_points=n_spatial_points,
+        n_realizations=n_realizations,
+        n_times=n_times,
+        include_altitude=include_altitude,
+    )
+
+    gams = TrainGAMsForSAMOS(model_specification).process(
+        cube_for_gam, features, additional_cubes_for_gam
+    )
+
+    cube_for_test, additional_cubes_for_test = create_cubes_for_gam_fitting(
+        n_spatial_points=2,
+        n_realizations=2,
+        n_times=1,
+        include_altitude=include_altitude,
+    )
+
+    result_mean, result_sd = get_climatological_stats(
+        cube_for_test, gams, features, additional_cubes_for_test
+    )
+
+    expected_mean = create_simple_cube(
+        forecast_type="gridded",
+        n_spatial_points=2,
+        n_realizations=2,
+        n_times=1,
+        fill_value=0.0,
+    )
+    expected_sd = expected_mean.copy()
+
+    if not include_altitude:
+        expected_mean.data = np.array(
+            [
+                [
+                    [284.39340789, 288.14617468],
+                    [288.14201, 291.89477679],
+                ],
+                [
+                    [284.39340789, 288.14617468],
+                    [288.14201, 291.89477679],
+                ],
+            ],
+            dtype=np.float32,
+        )
+        expected_sd.data = np.array(
+            [
+                [
+                    [0.40998445, 0.54307784],
+                    [0.535344, 0.66843739],
+                ],
+                [
+                    [0.40998445, 0.54307784],
+                    [0.535344, 0.66843739],
+                ],
+            ],
+            dtype=np.float32,
+        )
+    else:
+        expected_mean.data = np.array(
+            [
+                [
+                    [274.4123857, 288.16501162],
+                    [278.16098781, 291.91361374],
+                ],
+                [
+                    [274.4123857, 288.16501162],
+                    [278.16098781, 291.91361374],
+                ],
+            ],
+            dtype=np.float32,
+        )
+        expected_sd.data = np.array(
+            [
+                [
+                    [0.40892429, 0.54976011],
+                    [0.53428384, 0.67511965],
+                ],
+                [
+                    [0.40892429, 0.54976011],
+                    [0.53428384, 0.67511965],
+                ],
+            ],
+            dtype=np.float32,
+        )
+
+    assert result_mean == expected_mean
+    assert result_sd == expected_sd
diff --git a/improver_tests/calibration/test_init.py b/improver_tests/calibration/test_init.py
index ef182a79b7..e3bca58558 100644
--- a/improver_tests/calibration/test_init.py
+++ b/improver_tests/calibration/test_init.py
@@ -4,10 +4,13 @@
 # See LICENSE in the root of the repository for full licensing details.
 """Unit tests for calibration.__init__"""
 
+import importlib
 import unittest
 from datetime import datetime, timedelta
+from pathlib import Path
 
 import iris
+import joblib
 import numpy as np
 import pandas as pd
 import pytest
@@ -15,10 +18,12 @@
 
 from improver.calibration import (
     add_warning_comment,
-    get_training_period_cycles,
+    identify_parquet_type,
+    split_cubes_for_samos,
     split_forecasts_and_bias_files,
     split_forecasts_and_coeffs,
     split_forecasts_and_truth,
+    split_netcdf_parquet_pickle,
     validity_time_check,
 )
 from improver.synthetic_data.set_up_test_cubes import (
@@ -26,8 +31,13 @@
     set_up_probability_cube,
     set_up_variable_cube,
 )
+from improver.utilities.save import save_netcdf
 from improver_tests import ImproverTest
 
+pyarrow_installed = True
+if not importlib.util.find_spec("pyarrow"):
+    pyarrow_installed = False
+
 
 class Test_split_forecasts_and_truth(unittest.TestCase):
     """Test the split_forecasts_and_truth method."""
@@ -516,6 +526,126 @@ def forecast_cube():
     )
 
 
+@pytest.fixture
+def forecast_cubes_multi_time():
+    output_cubes = CubeList()
+    for day in [10, 11]:
+        cube = set_up_variable_cube(
+            data=np.array(
+                [[1.0, 2.0, 2.0], [2.0, 1.0, 3.0], [1.0, 3.0, 3.0]], dtype=np.float32
+            ),
+            name="wind_speed",
+            units="m/s",
+            time=datetime(2017, 11, day, 0, 0),
+            frt=datetime(2017, 11, day - 1, 0, 0),
+        )
+        output_cubes.append(cube.copy())
+
+    return output_cubes
+
+
+@pytest.fixture
+def probability_forecast_cubes_multi_time():
+    output_cubes = CubeList()
+    for day in [10, 11]:
+        prob_template_cube = set_up_probability_cube(
+            data=np.ones((2, 3, 3), dtype=np.float32),
+            thresholds=[1.5, 2.5],
+            time=datetime(2017, 11, day, 0, 0),
+            frt=datetime(2017, 11, day - 1, 0, 0),
+            variable_name="wind_speed",
+        )
+        output_cubes.append(prob_template_cube.copy())
+
+    return output_cubes
+
+
+@pytest.fixture
+def truth_cubes_multi_time():
+    output_cubes = CubeList()
+    for day in [10, 11]:
+        cube = set_up_variable_cube(
+            data=np.array(
+                [[1.0, 2.0, 2.0], [2.0, 1.0, 3.0], [1.0, 3.0, 3.0]], dtype=np.float32
+            ),
+            name="wind_speed",
+            units="m/s",
+            time=datetime(2017, 11, int(day), 0, 0),
+        )
+        cube.remove_coord("forecast_reference_time")
+        cube.remove_coord("forecast_period")
+        cube.attributes["truth_attribute"] = "truth"
+        output_cubes.append(cube.copy())
+
+    return output_cubes
+
+
+@pytest.fixture
+def emos_coefficient_cubes():
+    # Set-up coefficient cubes
+    fp_names = ["wind_speed"]
+    predictor_index = iris.coords.DimCoord(
+        np.array(range(len(fp_names)), dtype=np.int32),
+        long_name="predictor_index",
+        units="1",
+    )
+    dim_coords_and_dims = ((predictor_index, 0),)
+    predictor_name = iris.coords.AuxCoord(
+        fp_names, long_name="predictor_name", units="no_unit"
+    )
+    aux_coords_and_dims = ((predictor_name, 0),)
+
+    attributes = {
+        "diagnostic_standard_name": "wind_speed",
+        "distribution": "norm",
+    }
+    alpha = iris.cube.Cube(
+        np.array(0, dtype=np.float32),
+        long_name="emos_coefficients_alpha",
+        units="K",
+        attributes=attributes,
+    )
+    beta = iris.cube.Cube(
+        np.array([0.5], dtype=np.float32),
+        long_name="emos_coefficients_beta",
+        units="1",
+        attributes=attributes,
+        dim_coords_and_dims=dim_coords_and_dims,
+        aux_coords_and_dims=aux_coords_and_dims,
+    )
+    gamma = iris.cube.Cube(
+        np.array(0, dtype=np.float32),
+        long_name="emos_coefficients_gamma",
+        units="K",
+        attributes=attributes,
+    )
+    delta = iris.cube.Cube(
+        np.array(1, dtype=np.float32),
+        long_name="emos_coefficients_delta",
+        units="1",
+        attributes=attributes,
+    )
+
+    return CubeList([alpha, beta, gamma, delta])
+
+
+@pytest.fixture
+def emos_additional_fields():
+    altitude = set_up_variable_cube(
+        np.ones((3, 3), dtype=np.float32), name="surface_altitude", units="m"
+    )
+    d2o = set_up_variable_cube(
+        np.ones((3, 3), dtype=np.float32), name="distance_to_ocean", units="m"
+    )
+
+    output_cubes = CubeList([altitude, d2o])
+    for cube in output_cubes:
+        for coord in ["time", "forecast_reference_time", "forecast_period"]:
+            cube.remove_coord(coord)
+
+    return output_cubes
+
+
 @pytest.fixture
 def forecast_error_cubelist():
     bias_cubes = CubeList()
@@ -535,6 +665,75 @@ def forecast_error_cubelist():
     return bias_cubes
 
 
+def create_netcdf_file(tmp_path):
+    """Create a netcdf file with forecast data."""
+    cube = set_up_variable_cube(
+        data=np.array(
+            [[1.0, 2.0, 2.0], [2.0, 1.0, 3.0], [1.0, 3.0, 3.0]], dtype=np.float32
+        ),
+        name="wind_speed",
+        units="m/s",
+    )
+
+    output_dir = tmp_path / "netcdf_files"
+    output_dir.mkdir(parents=True, exist_ok=True)
+    output_path = str(output_dir / "forecast.nc")
+    save_netcdf(cubelist=cube, filename=output_path, compression_level=0)
+
+    return cube, output_path
+
+
+def create_multi_site_forecast_parquet_file(tmp_path, include_forecast_period=True):
+    """Create a parquet file with multi-site forecast data."""
+
+    data_dict = {
+        "percentile": np.repeat(50, 5),
+        "forecast": [281, 272, 287, 280, 290],
+        "altitude": [10, 83, 56, 23, 2],
+        "blend_time": [pd.Timestamp("2017-01-02 00:00:00")] * 5,
+        "forecast_reference_time": [pd.Timestamp("2017-01-02 00:00:00")] * 5,
+        "latitude": [60.1, 59.9, 59.7, 58, 57],
+        "longitude": [1, 2, -1, -2, -3],
+        "time": [pd.Timestamp("2017-01-02 06:00:00")] * 5,
+        "wmo_id": ["03001", "03002", "03003", "03004", "03005"],
+        "station_id": ["03001", "03002", "03003", "03004", "03005"],
+        "cf_name": ["air_temperature"] * 5,
+        "units": ["K"] * 5,
+        "experiment": ["latestblend"] * 5,
+        "period": [pd.NaT] * 5,
+        "height": [1.5] * 5,
+        "diagnostic": ["temperature_at_screen_level"] * 5,
+    }
+
+    parquet_type = "truth"
+    if include_forecast_period:
+        data_dict["forecast_period"] = [6 * 3.6e12] * 5
+        parquet_type = "forecast"
+
+    data_df = pd.DataFrame(data_dict)
+
+    output_dir = tmp_path / f"{parquet_type}_parquet_files"
+    output_dir.mkdir(parents=True, exist_ok=True)
+    output_path = str(output_dir / f"{parquet_type}.parquet")
+    data_df.to_parquet(output_path, index=False, engine="pyarrow")
+
+    return output_dir
+
+
+def create_pickle_file(tmp_path, filename="data.pkl"):
+    """Create a pickle file containing a GAM object."""
+    # Import pygam here to avoid a dependency for the entire repository.
+    import pygam
+
+    obj = [pygam.GAM(pygam.s(0) + pygam.l(1)), pygam.GAM(pygam.te(0, 1))]
+    output_dir = tmp_path / "pickle_files"
+    output_dir.mkdir(parents=True, exist_ok=True)
+    output_path = str(output_dir / filename)
+    joblib.dump(obj, output_path)
+
+    return obj, output_path
+
+
 @pytest.mark.parametrize("multiple_bias_cubes", [(True, False)])
 def test_split_forecasts_and_bias_files(
     forecast_cube, forecast_error_cubelist, multiple_bias_cubes
@@ -615,29 +814,451 @@ def test_add_warning_to_comment(comment):
     assert result.attributes["comment"] == expected
 
 
+@pytest.mark.parametrize("forecast_type", ["realization", "probability"])
+def test_split_cubes_for_samos_basic(
+    forecast_cubes_multi_time,
+    probability_forecast_cubes_multi_time,
+    truth_cubes_multi_time,
+    forecast_type,
+):
+    """Test that the split_cubes_for_samos function correctly separates out input
+    forecast and truth cubes."""
+    merged_input_cubelist = truth_cubes_multi_time.copy()
+
+    if forecast_type == "realization":
+        merged_input_cubelist.extend(forecast_cubes_multi_time)
+        expected_name = "wind_speed"
+        expected_shape = (2, 3, 3)
+    else:
+        merged_input_cubelist.extend(probability_forecast_cubes_multi_time)
+        expected_name = "probability_of_wind_speed_above_threshold"
+        expected_shape = (2, 2, 3, 3)
+
+    (
+        result_forecast_cube,
+        result_truth_cube,
+        result_gam_additional_fields,
+        result_emos_coefficients,
+        result_emos_additional_fields,
+        result_prob_template,
+    ) = split_cubes_for_samos(
+        merged_input_cubelist, gam_features=[], truth_attribute="truth_attribute=truth"
+    )
+
+    # Check that the output forecast cube is as expected.
+    assert isinstance(result_forecast_cube, iris.cube.Cube)
+    assert result_forecast_cube.name() == expected_name
+    assert result_forecast_cube.shape == expected_shape
+    assert "forecast_reference_time" in [
+        c.name() for c in result_forecast_cube.coords()
+    ]
+    assert "forecast_period" in [c.name() for c in result_forecast_cube.coords()]
+
+    # Check that the output truth cube is as expected.
+    assert isinstance(result_truth_cube, iris.cube.Cube)
+    assert result_truth_cube.shape == (2, 3, 3)
+    assert "forecast_reference_time" not in [
+        c.name() for c in result_truth_cube.coords()
+    ]
+    assert "forecast_period" not in [c.name() for c in result_truth_cube.coords()]
+    assert result_truth_cube.attributes["truth_attribute"] == "truth"
+
+    # Check all other outputs are None.
+    assert result_gam_additional_fields is None
+    assert result_emos_coefficients is None
+    assert result_emos_additional_fields is None
+    assert result_prob_template is None
+
+
 @pytest.mark.parametrize(
-    "cycletime,forecast_period,training_length,expected",
+    "provide_emos_coefficients,expect_emos_coefficients,provide_emos_additional_fields,expect_emos_additional_fields",
     [
-        ("20171110T0000Z", 3600, 1, [pd.Timestamp(2017, 11, 9, 0, 0, tz="UTC")]),  # noqa 1 hour
-        ("20171110T0000Z", 90000, 1, [pd.Timestamp(2017, 11, 8, 0, 0, tz="UTC")]),  # noqa 25 hours
-        ("20171110T0000Z", 176400, 1, [pd.Timestamp(2017, 11, 7, 0, 0, tz="UTC")]),  # noqa 49 hours
+        [True, True, False, False],
+        [True, False, False, False],
+        [False, False, True, True],
+        [False, False, True, False],
+        [True, True, True, True],
+    ],
+)
+def test_split_cubes_for_samos_emos_cubes(
+    forecast_cubes_multi_time,
+    truth_cubes_multi_time,
+    emos_coefficient_cubes,
+    emos_additional_fields,
+    provide_emos_coefficients,
+    expect_emos_coefficients,
+    provide_emos_additional_fields,
+    expect_emos_additional_fields,
+):
+    """Test that the split_cubes_for_samos function correctly separates out input
+    cubes when cubes for EMOS calibration are provided. Also test that the correct
+    exceptions are raised if cubes for EMOS are provided but not expected."""
+    merged_input_cubelist = truth_cubes_multi_time.copy()
+    merged_input_cubelist.extend(forecast_cubes_multi_time)
+
+    if provide_emos_coefficients:
+        merged_input_cubelist.extend(emos_coefficient_cubes)
+    if provide_emos_additional_fields:
+        merged_input_cubelist.extend(emos_additional_fields)
+
+    if provide_emos_coefficients and not expect_emos_coefficients:
+        with pytest.raises(
+            IOError,
+            match="Found EMOS coefficients cubes when they were not expected.",
+        ):
+            split_cubes_for_samos(
+                merged_input_cubelist,
+                gam_features=[],
+                truth_attribute="truth_attribute=truth",
+                expect_emos_coeffs=expect_emos_coefficients,
+                expect_emos_fields=expect_emos_additional_fields,
+            )
+    elif provide_emos_additional_fields and not expect_emos_additional_fields:
+        with pytest.raises(
+            IOError,
+            match="Found additional fields cubes which do not match the features in "
+            "gam_features.",
+        ):
+            split_cubes_for_samos(
+                merged_input_cubelist,
+                gam_features=[],
+                truth_attribute="truth_attribute=truth",
+                expect_emos_coeffs=expect_emos_coefficients,
+                expect_emos_fields=expect_emos_additional_fields,
+            )
+    else:
         (
-            "20171110T0000Z",
-            3600,
-            2,
-            [
-                pd.Timestamp(2017, 11, 8, 0, 0, tz="UTC"),
-                pd.Timestamp(2017, 11, 9, 0, 0, tz="UTC"),
-            ],
-        ),  # 1 hour, 2 days
+            result_forecast_cube,
+            result_truth_cube,
+            result_gam_additional_fields,
+            result_emos_coefficients,
+            result_emos_additional_fields,
+            result_prob_template,
+        ) = split_cubes_for_samos(
+            merged_input_cubelist,
+            gam_features=[],
+            truth_attribute="truth_attribute=truth",
+            expect_emos_coeffs=expect_emos_coefficients,
+            expect_emos_fields=expect_emos_additional_fields,
+        )
+
+        # Check that the output forecast cube is as expected.
+        assert isinstance(result_forecast_cube, iris.cube.Cube)
+        assert result_forecast_cube.shape == (2, 3, 3)
+        assert "forecast_reference_time" in [
+            c.name() for c in result_forecast_cube.coords()
+        ]
+        assert "forecast_period" in [c.name() for c in result_forecast_cube.coords()]
+
+        # Check that the output truth cube is as expected.
+        assert isinstance(result_truth_cube, iris.cube.Cube)
+        assert result_truth_cube.shape == (2, 3, 3)
+        assert "forecast_reference_time" not in [
+            c.name() for c in result_truth_cube.coords()
+        ]
+        assert "forecast_period" not in [c.name() for c in result_truth_cube.coords()]
+        assert result_truth_cube.attributes["truth_attribute"] == "truth"
+
+        if expect_emos_coefficients:
+            # Check that the output EMOS coefficient cubes are as expected.
+            assert isinstance(result_emos_coefficients, CubeList)
+            assert len(result_emos_coefficients) == 4
+            expected_names = [
+                "emos_coefficients_alpha",
+                "emos_coefficients_beta",
+                "emos_coefficients_gamma",
+                "emos_coefficients_delta",
+            ]
+            for i, cube in enumerate(result_emos_coefficients):
+                assert expected_names[i] == cube.name()
+
+        if expect_emos_additional_fields:
+            # Check that the output EMOS additional fields cubes are as expected.
+            assert isinstance(result_emos_additional_fields, CubeList)
+            assert len(result_emos_additional_fields) == 2
+            expected_names = ["surface_altitude", "distance_to_ocean"]
+            for i, cube in enumerate(result_emos_additional_fields):
+                assert expected_names[i] == cube.name()
+
+        # Check all other outputs are None.
+        assert result_gam_additional_fields is None
+        assert result_prob_template is None
+
+
+@pytest.mark.parametrize(
+    "gam_features",
+    [
+        [],
+        ["surface_altitude"],
+        ["distance_to_ocean"],
+        ["surface_altitude", "distance_to_ocean"],
     ],
 )
-def test_get_training_period_cycles(
-    cycletime, forecast_period, training_length, expected
+def test_split_cubes_for_samos_gam_and_emos_cubes(
+    forecast_cubes_multi_time,
+    truth_cubes_multi_time,
+    emos_additional_fields,
+    gam_features,
 ):
-    """Test that get_training_period_cycles returns the expected cycletimes."""
-    result = get_training_period_cycles(cycletime, forecast_period, training_length)
-    assert list(result) == expected
+    """Test that the split_cubes_for_samos function correctly separates out input
+    cubes when cubes for both EMOS calibration and GAM are provided."""
+    merged_input_cubelist = truth_cubes_multi_time.copy()
+    merged_input_cubelist.extend(forecast_cubes_multi_time)
+    merged_input_cubelist.extend(emos_additional_fields)
+
+    emos_feature_names = [
+        name
+        for name in ["surface_altitude", "distance_to_ocean"]
+        if name not in gam_features
+    ]
+    expect_emos_additional_fields = True if len(emos_feature_names) > 0 else False
+
+    (
+        result_forecast_cube,
+        result_truth_cube,
+        result_gam_additional_fields,
+        result_emos_coefficients,
+        result_emos_additional_fields,
+        result_prob_template,
+    ) = split_cubes_for_samos(
+        merged_input_cubelist,
+        gam_features=gam_features,
+        truth_attribute="truth_attribute=truth",
+        expect_emos_coeffs=False,
+        expect_emos_fields=expect_emos_additional_fields,
+    )
+
+    # Check that the output forecast cube is as expected.
+    assert isinstance(result_forecast_cube, iris.cube.Cube)
+    assert result_forecast_cube.shape == (2, 3, 3)
+    assert "forecast_reference_time" in [
+        c.name() for c in result_forecast_cube.coords()
+    ]
+    assert "forecast_period" in [c.name() for c in result_forecast_cube.coords()]
+
+    # Check that the output truth cube is as expected.
+    assert isinstance(result_truth_cube, iris.cube.Cube)
+    assert result_truth_cube.shape == (2, 3, 3)
+    assert "forecast_reference_time" not in [
+        c.name() for c in result_truth_cube.coords()
+    ]
+    assert "forecast_period" not in [c.name() for c in result_truth_cube.coords()]
+    assert result_truth_cube.attributes["truth_attribute"] == "truth"
+
+    # Check that the output GAM additional fields cubes are as expected.
+    if len(gam_features) > 0:
+        assert len(result_gam_additional_fields) == len(gam_features)
+        for cube in result_gam_additional_fields:
+            assert cube.name() in gam_features
+    else:
+        assert result_gam_additional_fields is None
+
+    # Check that the output EMOS additional fields cubes are as expected.
+    if len(emos_feature_names) > 0:
+        assert len(result_emos_additional_fields) == len(emos_feature_names)
+        for cube in result_emos_additional_fields:
+            assert cube.name() in emos_feature_names
+    else:
+        assert result_emos_additional_fields is None
+
+    # Check all other outputs are None.
+    assert result_emos_coefficients is None
+    assert result_prob_template is None
+
+
+@pytest.mark.parametrize("n_prob_templates", [1, 2])
+def test_split_cubes_for_samos_prob_template(
+    forecast_cubes_multi_time,
+    truth_cubes_multi_time,
+    probability_forecast_cubes_multi_time,
+    n_prob_templates,
+):
+    """Test that the split_cubes_for_samos function correctly separates out input
+    cubes when a probability template cube is provided."""
+    merged_input_cubelist = truth_cubes_multi_time.copy()
+    merged_input_cubelist.extend(forecast_cubes_multi_time)
+
+    if n_prob_templates == 1:
+        merged_input_cubelist.append(probability_forecast_cubes_multi_time[0])
+
+        (
+            result_forecast_cube,
+            result_truth_cube,
+            result_gam_additional_fields,
+            result_emos_coefficients,
+            result_emos_additional_fields,
+            result_prob_template,
+        ) = split_cubes_for_samos(
+            merged_input_cubelist,
+            gam_features=[],
+            truth_attribute="truth_attribute=truth",
+            expect_emos_coeffs=True,
+            expect_emos_fields=True,
+        )
+
+        # Check that the output forecast cube is as expected.
+        assert isinstance(result_forecast_cube, iris.cube.Cube)
+        assert result_forecast_cube.shape == (2, 3, 3)
+        assert "forecast_reference_time" in [
+            c.name() for c in result_forecast_cube.coords()
+        ]
+        assert "forecast_period" in [c.name() for c in result_forecast_cube.coords()]
+
+        # Check that the output truth cube is as expected.
+        assert isinstance(result_truth_cube, iris.cube.Cube)
+        assert result_truth_cube.shape == (2, 3, 3)
+        assert "forecast_reference_time" not in [
+            c.name() for c in result_truth_cube.coords()
+        ]
+        assert "forecast_period" not in [c.name() for c in result_truth_cube.coords()]
+        assert result_truth_cube.attributes["truth_attribute"] == "truth"
+
+        # Check that the output probability template cube is as expected.
+        assert isinstance(result_prob_template, iris.cube.Cube)
+        assert result_prob_template.shape == (2, 3, 3)
+        assert "wind_speed" in [c.name() for c in result_prob_template.coords()]
+        assert "forecast_reference_time" in [
+            c.name() for c in result_prob_template.coords()
+        ]
+        assert "forecast_period" in [c.name() for c in result_prob_template.coords()]
+
+        # Check all other outputs are None.
+        assert result_gam_additional_fields is None
+        assert result_emos_coefficients is None
+        assert result_emos_additional_fields is None
+
+    else:
+        probability_cubes = probability_forecast_cubes_multi_time.copy()
+        probability_cubes[1].rename("probability_of_air_temperature_above_threshold")
+        probability_cubes[1].coord("wind_speed").rename("air_temperature")
+        merged_input_cubelist.extend(probability_cubes)
+
+        with pytest.raises(
+            IOError, match="Providing multiple probability cubes is not supported."
+        ):
+            split_cubes_for_samos(
+                merged_input_cubelist,
+                gam_features=[],
+                truth_attribute="truth_attribute=truth",
+                expect_emos_coeffs=True,
+                expect_emos_fields=True,
+            )
+
+
+@pytest.mark.skipif(not pyarrow_installed, reason="pyarrow not installed")
+@pytest.mark.parametrize(
+    "include_forecast,include_truth", [(True, False), (False, True), (True, True)]
+)
+def test_identify_parquet_type_basic(tmp_path, include_forecast, include_truth):
+    """Test that this function correctly splits out input parquet files into forecasts
+    and truths."""
+    merged_input_paths = []
+    if include_forecast:
+        forecast_dir = create_multi_site_forecast_parquet_file(
+            tmp_path=tmp_path,
+            include_forecast_period=True,
+        )
+        merged_input_paths.append(Path(forecast_dir))
+    if include_truth:
+        truth_dir = create_multi_site_forecast_parquet_file(
+            tmp_path=tmp_path,
+            include_forecast_period=False,
+        )
+        merged_input_paths.append(Path(truth_dir))
+
+    (
+        result_forecast_paths,
+        result_truth_paths,
+    ) = identify_parquet_type(merged_input_paths)
+
+    assert result_forecast_paths == (
+        merged_input_paths[0] if include_forecast else None
+    )
+    assert result_truth_paths == (merged_input_paths[-1] if include_truth else None)
+
+
+@pytest.mark.skipif(not pyarrow_installed, reason="pyarrow not installed")
+@pytest.mark.parametrize("include_netcdf", [True, False])
+@pytest.mark.parametrize("include_parquet", [True, False])
+@pytest.mark.parametrize("n_pickles", [0, 1, 2])
+def test_split_netcdf_parquet_pickle_basic(
+    tmp_path, forecast_cubes_multi_time, include_netcdf, include_parquet, n_pickles
+):
+    """Test that this function correctly splits out input files into pickle, parquet
+    and netcdf files."""
+    pytest.importorskip("pygam")
+
+    merged_input_paths = []
+
+    if include_parquet:
+        forecast_dir = create_multi_site_forecast_parquet_file(
+            tmp_path=tmp_path,
+            include_forecast_period=True,
+        )
+        merged_input_paths.append(Path(forecast_dir))
+
+        truth_dir = create_multi_site_forecast_parquet_file(
+            tmp_path=tmp_path,
+            include_forecast_period=False,
+        )
+        merged_input_paths.append(Path(truth_dir))
+
+    if include_netcdf:
+        test_cube, netcdf_path = create_netcdf_file(
+            tmp_path=tmp_path,
+        )
+        merged_input_paths.append(Path(netcdf_path))
+
+    if n_pickles > 0:
+        pickle_object, pickle_path = create_pickle_file(
+            tmp_path=tmp_path, filename="data1.pkl"
+        )
+        merged_input_paths.append(Path(pickle_path))
+
+    if n_pickles == 2:
+        pickle_object, pickle_path = create_pickle_file(
+            tmp_path=tmp_path, filename="data2.pkl"
+        )
+        merged_input_paths.append(Path(pickle_path))
+
+        msg = "Multiple pickle inputs have been provided. Only one is expected."
+        with pytest.raises(ValueError, match=msg):
+            split_netcdf_parquet_pickle(merged_input_paths)
+    else:
+        (
+            result_cube,
+            result_parquets,
+            result_pickles,
+        ) = split_netcdf_parquet_pickle(merged_input_paths)
+
+        if include_netcdf:
+            assert len(result_cube) == 1
+            result_cube = result_cube[0]
+            # The save/load process adds a Conventions global attribute to the cube.
+            test_cube.attributes.globals["Conventions"] = "CF-1.7"
+            assert result_cube == test_cube
+        else:
+            assert result_cube is None
+
+        if include_parquet:
+            # Parquets are returned as filepaths.
+            assert result_parquets == merged_input_paths[0:2]
+        else:
+            assert result_parquets is None
+
+        if n_pickles == 1:
+            # The pickled object is a list containing 2 pygam GAMs.
+            for i in range(len(result_pickles)):
+                for key in ["max_iter", "tol", "fit_intercept"]:
+                    assert (
+                        result_pickles[i].get_params()[key]
+                        == pickle_object[i].get_params()[key]
+                    )
+                assert result_pickles[i].terms == pickle_object[i].terms
+                assert result_pickles[i].distribution == pickle_object[i].distribution
+                assert result_pickles[i].link == pickle_object[i].link
+        else:
+            assert result_pickles is None
 
 
 if __name__ == "__main__":
diff --git a/improver_tests/calibration/utilities/test_utilities.py b/improver_tests/calibration/utilities/test_utilities.py
index 9e6d6ff6ba..db2b7b1e9c 100644
--- a/improver_tests/calibration/utilities/test_utilities.py
+++ b/improver_tests/calibration/utilities/test_utilities.py
@@ -9,10 +9,15 @@
 """
 
 import datetime
+import importlib
+import re
 import unittest
+from pathlib import Path
 
 import iris
 import numpy as np
+import pandas as pd
+import pytest
 from iris.coords import AuxCoord, DimCoord
 from iris.cube import CubeList
 from iris.tests import IrisTest
@@ -25,23 +30,30 @@
     check_forecast_consistency,
     check_predictor,
     convert_cube_data_to_2d,
+    convert_parquet_to_cube,
     create_unified_frt_coord,
     filter_non_matching_cubes,
     flatten_ignoring_masked_data,
     forecast_coords_match,
     get_frt_hours,
     merge_land_and_sea,
+    prepare_cube_no_calibration,
 )
 from improver.metadata.constants.time_types import TIME_COORDS
 from improver.synthetic_data.set_up_test_cubes import (
     add_coordinate,
     set_up_percentile_cube,
+    set_up_probability_cube,
     set_up_variable_cube,
 )
 from improver.utilities.cube_manipulation import sort_coord_in_cube
 
 from ..emos_calibration.helper_functions import SetupCubes
 
+pyarrow_installed = True
+if not importlib.util.find_spec("pyarrow"):
+    pyarrow_installed = False
+
 
 class Test_convert_cube_data_to_2d(IrisTest):
     """Test the convert_cube_data_to_2d utility."""
@@ -914,5 +926,374 @@ def test_spot_point_by_point_nans_alternative_proportion(self):
             )
 
 
+@pytest.fixture
+def forecast_cube():
+    return set_up_variable_cube(
+        data=np.array(
+            [[1.0, 2.0, 2.0], [2.0, 1.0, 3.0], [1.0, 3.0, 3.0]], dtype=np.float32
+        ),
+        name="wind_speed",
+        units="m/s",
+    )
+
+
+@pytest.fixture
+def forecast_percentile_cube():
+    return set_up_percentile_cube(
+        data=np.array(
+            [
+                [[1.0, 2.0, 2.0], [2.0, 1.0, 3.0], [1.0, 3.0, 3.0]],
+                [[3.0, 4.0, 4.0], [4.0, 3.0, 5.0], [3.0, 5.0, 5.0]],
+            ],
+            dtype=np.float32,
+        ),
+        percentiles=np.array([50, 60], dtype=np.float32),
+        name="wind_speed",
+        units="m/s",
+    )
+
+
+@pytest.fixture
+def prob_template():
+    return set_up_probability_cube(
+        data=np.ones((2, 3, 3), dtype=np.float32),
+        thresholds=[1.5, 2.5],
+        variable_name="wind_speed",
+    )
+
+
+@pytest.fixture
+def emos_coefficient_cubes():
+    # Set-up coefficient cubes
+    fp_names = ["wind_speed"]
+    predictor_index = DimCoord(
+        np.array(range(len(fp_names)), dtype=np.int32),
+        long_name="predictor_index",
+        units="1",
+    )
+    dim_coords_and_dims = ((predictor_index, 0),)
+    predictor_name = AuxCoord(fp_names, long_name="predictor_name", units="no_unit")
+    aux_coords_and_dims = ((predictor_name, 0),)
+
+    attributes = {
+        "diagnostic_standard_name": "wind_speed",
+        "distribution": "norm",
+    }
+    alpha = iris.cube.Cube(
+        np.array(0, dtype=np.float32),
+        long_name="emos_coefficients_alpha",
+        units="K",
+        attributes=attributes,
+    )
+    beta = iris.cube.Cube(
+        np.array([0.5], dtype=np.float32),
+        long_name="emos_coefficients_beta",
+        units="1",
+        attributes=attributes,
+        dim_coords_and_dims=dim_coords_and_dims,
+        aux_coords_and_dims=aux_coords_and_dims,
+    )
+    gamma = iris.cube.Cube(
+        np.array(0, dtype=np.float32),
+        long_name="emos_coefficients_gamma",
+        units="K",
+        attributes=attributes,
+    )
+    delta = iris.cube.Cube(
+        np.array(1, dtype=np.float32),
+        long_name="emos_coefficients_delta",
+        units="1",
+        attributes=attributes,
+    )
+
+    return CubeList([alpha, beta, gamma, delta])
+
+
+def create_multi_forecast_period_forecast_parquet_file(tmp_path):
+    """Create a parquet file with multi-forecast period forecast data."""
+
+    data_dict = {
+        "percentile": [50, 50, 50, 50],
+        "forecast": [277, 270, 280, 269],
+        "altitude": [10, 83, 10, 83],
+        "blend_time": [pd.Timestamp("2017-01-02 00:00:00", tz="utc")] * 4,
+        "forecast_period": np.repeat(
+            [
+                [pd.Timedelta(int(6 * 3.6e3), unit="seconds")],
+                [pd.Timedelta(int(12 * 3.6e3), unit="seconds")],
+            ],
+            2,
+        ),
+        "forecast_reference_time": [pd.Timestamp("2017-01-02 00:00:00", tz="utc")] * 4,
+        "latitude": [60.1, 59.9, 60.1, 59.9],
+        "longitude": [1, 2, 1, 2],
+        "time": np.repeat(
+            [
+                pd.Timestamp("2017-01-02 06:00:00", tz="utc"),
+                pd.Timestamp("2017-01-02 12:00:00", tz="utc"),
+            ],
+            2,
+        ),
+        "wmo_id": ["03001", "03002", "03001", "03002"],
+        "station_id": ["03001", "03002", "03001", "03002"],
+        "cf_name": ["air_temperature"] * 4,
+        "units": ["K"] * 4,
+        "experiment": ["latestblend"] * 4,
+        "period": [pd.NaT] * 4,
+        "height": [1.5] * 4,
+        "diagnostic": ["temperature_at_screen_level"] * 4,
+    }
+    # Add wind speed to demonstrate filtering.
+    wind_speed_dict = data_dict.copy()
+    wind_speed_dict["forecast"] = [6, 16, 12, 15]
+    wind_speed_dict["cf_name"] = "wind_speed"
+    wind_speed_dict["units"] = "m s-1"
+    wind_speed_dict["diagnostic"] = "wind_speed_at_10m"
+    data_df = pd.DataFrame(data_dict)
+    wind_speed_df = pd.DataFrame(wind_speed_dict)
+    joined_df = pd.concat([data_df, wind_speed_df], ignore_index=True)
+
+    output_dir = tmp_path / "forecast_parquet_files"
+    output_dir.mkdir(parents=True, exist_ok=True)
+    output_path = str(output_dir / "forecast.parquet")
+    joined_df.to_parquet(output_path, index=False, engine="pyarrow")
+
+    return data_df, output_dir
+
+
+def create_multi_forecast_period_truth_parquet_file(tmp_path):
+    """Create a parquet file with multi-forecast period truth data."""
+    data_dict = {
+        "diagnostic": ["temperature_at_screen_level"] * 4,
+        "latitude": [60.1, 59.9, 60.1, 59.9],
+        "longitude": [1, 2, 1, 2],
+        "altitude": [10, 83, 10, 83],
+        "time": np.repeat(
+            [
+                pd.Timestamp("2017-01-02 06:00:00", tz="utc"),
+                pd.Timestamp("2017-01-02 12:00:00", tz="utc"),
+            ],
+            2,
+        ),
+        "wmo_id": ["03001", "03002", "03001", "03002"],
+        "ob_value": [280, 273, 284, 275],
+    }
+    wind_speed_dict = data_dict.copy()
+    wind_speed_dict["ob_value"] = [2, 11, 10, 14]
+    wind_speed_dict["diagnostic"] = "wind_speed_at_10m"
+    data_df = pd.DataFrame(data_dict)
+    wind_speed_df = pd.DataFrame(wind_speed_dict)
+    joined_df = pd.concat([data_df, wind_speed_df], ignore_index=True)
+
+    output_dir = tmp_path / "truth_parquet_files"
+    output_dir.mkdir(parents=True, exist_ok=True)
+    output_path = str(output_dir / "truth.parquet")
+    joined_df.to_parquet(output_path, index=False, engine="pyarrow")
+    return data_df, output_dir
+
+
+@pytest.mark.parametrize(
+    "include_coeffs,validity_times",
+    [
+        [True, None],
+        [True, ["0400"]],  # matches forecast cube validity time
+        [True, ["0500"]],  # does not match forecast cube validity time
+        [False, None],
+    ],
+)
+def test_prepare_cube_no_calibration_basic(
+    forecast_cube, emos_coefficient_cubes, include_coeffs, validity_times
+):
+    """Test that the function returns the expected result when coefficients are
+    provided with and without using the validity_time argument.
+    """
+    result = prepare_cube_no_calibration(
+        forecast_cube,
+        emos_coefficients=emos_coefficient_cubes if include_coeffs else None,
+        validity_times=validity_times,
+    )
+
+    if not include_coeffs or validity_times == ["0500"]:
+        # No matching coefficients so should return the input cube with some
+        # additional metadata.
+        assert isinstance(result, iris.cube.Cube)
+        assert result.coords() == forecast_cube.coords()
+        assert np.all(result.data == forecast_cube.data)
+        assert result.attributes["comment"] == (
+            "Warning: Calibration of this forecast has been attempted, however, no "
+            "calibration has been applied."
+        )
+    else:
+        assert result is None
+
+
+@pytest.mark.parametrize(
+    "include_coeffs,validity_times",
+    [
+        [True, None],
+        [True, ["0400"]],  # matches forecast cube validity time
+        [True, ["0500"]],  # does not match forecast cube validity time
+        [False, None],
+    ],
+)
+def test_prepare_cube_no_calibration_prob_template(
+    forecast_cube, emos_coefficient_cubes, prob_template, include_coeffs, validity_times
+):
+    """Test that the function returns the expected result when coefficients and a
+    probability template are provided with and without using the validity_time argument.
+    """
+    result = prepare_cube_no_calibration(
+        forecast_cube,
+        emos_coefficients=emos_coefficient_cubes if include_coeffs else None,
+        validity_times=validity_times,
+        prob_template=prob_template,
+    )
+
+    if not include_coeffs or validity_times == ["0500"]:
+        # No matching coefficients so should return the probability template cube with
+        # some additional metadata.
+        assert isinstance(result, iris.cube.Cube)
+        assert result.coords() == prob_template.coords()
+        assert np.all(result.data == prob_template.data)
+        assert result.attributes["comment"] == (
+            "Warning: Calibration of this forecast has been attempted, however, no "
+            "calibration has been applied."
+        )
+    else:
+        assert result is None
+
+
+@pytest.mark.skipif(not pyarrow_installed, reason="pyarrow not installed")
+@pytest.mark.parametrize(
+    "include_coeffs,validity_times",
+    [
+        [True, None],
+        [True, ["0400"]],  # matches forecast cube validity time
+        [True, ["0500"]],  # does not match forecast cube validity time
+        [False, None],
+    ],
+)
+def test_prepare_cube_no_calibration_percentiles(
+    forecast_percentile_cube, emos_coefficient_cubes, include_coeffs, validity_times
+):
+    """Test that the function returns the expected result when coefficients and a set
+    of percentiles are provided."""
+    percentiles = [55]
+
+    expected = set_up_percentile_cube(
+        data=np.array(
+            [
+                [[2.0, 3.0, 3.0], [3.0, 2.0, 4.0], [2.0, 4.0, 4.0]],
+            ],
+            dtype=np.float32,
+        ),
+        percentiles=np.array(percentiles, dtype=np.float32),
+        name="wind_speed",
+        units="m/s",
+    )
+
+    result = prepare_cube_no_calibration(
+        forecast_percentile_cube,
+        emos_coefficients=emos_coefficient_cubes if include_coeffs else None,
+        validity_times=validity_times,
+        percentiles=percentiles,
+    )
+
+    if not include_coeffs or validity_times == ["0500"]:
+        # No matching coefficients so should return a cube with percentiles
+        # resampled.
+        assert isinstance(result, iris.cube.Cube)
+        assert result.coords() == expected.coords()
+        assert np.all(result.data == expected.data)
+        assert result.attributes["comment"] == (
+            "Warning: Calibration of this forecast has been attempted, however, no "
+            "calibration has been applied."
+        )
+    else:
+        assert result is None
+
+
+@pytest.mark.skipif(not pyarrow_installed, reason="pyarrow not installed")
+@pytest.mark.parametrize("cycletime", ["20170103T0000Z", "20170104T0000Z"])
+def test_convert_parquet_to_cube_basic(tmp_path, cycletime):
+    """Test that this function returns the expected cubes when provided with valid
+    inputs."""
+    fcs_df, fcs_path = create_multi_forecast_period_forecast_parquet_file(tmp_path)
+
+    truth_df, truth_path = create_multi_forecast_period_truth_parquet_file(tmp_path)
+
+    fcs_cube, truth_cube = convert_parquet_to_cube(
+        Path(fcs_path),
+        Path(truth_path),
+        forecast_period=6 * 3.6e3,  # seconds
+        cycletime=cycletime,
+        training_length=1,
+        diagnostic="temperature_at_screen_level",
+        percentiles=[50],
+        experiment="latestblend",
+    )
+
+    if cycletime == "20170103T0000Z":
+        # There is valid training data in the dataframes for this cycletime.
+        assert isinstance(fcs_cube, iris.cube.Cube)
+        assert isinstance(truth_cube, iris.cube.Cube)
+
+        for cube in [fcs_cube, truth_cube]:
+            assert cube.name() == "air_temperature"
+            assert (
+                cube.coord("time").points
+                == pd.Timestamp("2017-01-02 06:00:00", tz="utc").timestamp()
+            )
+            np.testing.assert_array_almost_equal(
+                cube.coord("altitude").points, np.array([10, 83], dtype=np.float32)
+            )
+            np.testing.assert_array_almost_equal(
+                cube.coord("latitude").points, np.array([60.1, 59.9], dtype=np.float32)
+            )
+            np.testing.assert_array_almost_equal(
+                cube.coord("longitude").points, np.array([1, 2], dtype=np.float32)
+            )
+            np.testing.assert_array_equal(
+                cube.coord("wmo_id").points, np.array(["03001", "03002"])
+            )
+
+        assert (
+            fcs_cube.coord("forecast_reference_time").points
+            == pd.Timestamp("2017-01-02 00:00:00", tz="utc").timestamp()
+        )
+        assert fcs_cube.coord("forecast_period").points == 6 * 3.6e3
+    else:
+        # There is no valid training data in the dataframes for this cycletime.
+        assert fcs_cube is None
+        assert truth_cube is None
+
+
+@pytest.mark.skipif(not pyarrow_installed, reason="pyarrow not installed")
+def test_convert_parquet_to_cube_exception(tmp_path):
+    """Test that the correct exception is raised when filtering returns an empty truth
+    dataframe."""
+    fcs_df, fcs_path = create_multi_forecast_period_forecast_parquet_file(tmp_path)
+
+    truth_df, truth_path = create_multi_forecast_period_truth_parquet_file(tmp_path)
+
+    # This input diagnostic does not exist in the forecast or truth dataframes.
+    diagnostic = "lwe_precipitation_rate"
+    msg = re.escape(
+        f"The requested filepath {truth_path} does not contain the requested contents: "
+        "[[('diagnostic', '==', 'lwe_precipitation_rate')]]"
+    )
+    with pytest.raises(IOError, match=msg):
+        convert_parquet_to_cube(
+            Path(fcs_path),
+            Path(truth_path),
+            forecast_period=6 * 3.6e3,  # seconds
+            cycletime="20170103T0000Z",
+            training_length=1,
+            diagnostic=diagnostic,
+            percentiles=[50],
+            experiment="latestblend",
+        )
+
+
 if __name__ == "__main__":
     unittest.main()