diff --git a/ndsl/stencils/testing/conftest.py b/ndsl/stencils/testing/conftest.py
index 2ed22fee..474cdb98 100644
--- a/ndsl/stencils/testing/conftest.py
+++ b/ndsl/stencils/testing/conftest.py
@@ -91,6 +91,12 @@ def pytest_addoption(parser):
         default=False,
         help="Use the multi-modal float metric. Default to False.",
     )
+    parser.addoption(
+        "--sort_report",
+        action="store",
+        default="ulp",
+        help='Sort the report by "index" (ascending) or along the metric: "ulp", "absolute", "relative" (descending). Default to "ulp"',
+    )
 
 
 def pytest_configure(config):
@@ -237,6 +243,7 @@ def sequential_savepoint_cases(metafunc, data_path, namelist_filename, *, backen
     savepoint_to_replay = get_savepoint_restriction(metafunc)
     grid_mode = metafunc.config.getoption("grid")
     topology_mode = metafunc.config.getoption("topology")
+    sort_report = metafunc.config.getoption("sort_report")
     return _savepoint_cases(
         savepoint_names,
         ranks,
@@ -247,6 +254,7 @@ def sequential_savepoint_cases(metafunc, data_path, namelist_filename, *, backen
         data_path,
         grid_mode,
         topology_mode,
+        sort_report=sort_report,
     )
 
 
@@ -260,6 +268,7 @@ def _savepoint_cases(
     data_path: str,
     grid_mode: str,
     topology_mode: bool,
+    sort_report: str,
 ):
     return_list = []
     for rank in ranks:
@@ -309,10 +318,10 @@ def _savepoint_cases(
                     SavepointCase(
                         savepoint_name=test_name,
                         data_dir=data_path,
-                        rank=rank,
                         i_call=i_call,
                         testobj=testobj,
                         grid=grid,
+                        sort_report=sort_report,
                     )
                 )
     return return_list
@@ -333,6 +342,7 @@ def parallel_savepoint_cases(
 ):
     namelist = get_namelist(namelist_filename)
     topology_mode = metafunc.config.getoption("topology")
+    sort_report = metafunc.config.getoption("sort_report")
     communicator = get_communicator(comm, namelist.layout, topology_mode)
     stencil_config = get_config(backend, communicator)
     savepoint_names = get_parallel_savepoint_names(metafunc, data_path)
@@ -348,6 +358,7 @@ def parallel_savepoint_cases(
         data_path,
         grid_mode,
         topology_mode,
+        sort_report=sort_report,
     )
 
 
@@ -416,6 +427,11 @@ def multimodal_metric(pytestconfig):
     return bool(pytestconfig.getoption("multimodal_metric"))
 
 
+@pytest.fixture()
+def sort_report(pytestconfig):
+    return pytestconfig.getoption("report_sort")
+
+
 @pytest.fixture()
 def grid(pytestconfig):
     return pytestconfig.getoption("grid")
diff --git a/ndsl/stencils/testing/parallel_translate.py b/ndsl/stencils/testing/parallel_translate.py
index e0669994..7df16a17 100644
--- a/ndsl/stencils/testing/parallel_translate.py
+++ b/ndsl/stencils/testing/parallel_translate.py
@@ -17,6 +17,9 @@
 class ParallelTranslate:
     max_error = TranslateFortranData2Py.max_error
     near_zero = TranslateFortranData2Py.near_zero
+    mmr_absolute_eps = TranslateFortranData2Py.mmr_absolute_eps
+    mmr_relative_fraction = TranslateFortranData2Py.mmr_relative_fraction
+    mmr_ulp = TranslateFortranData2Py.mmr_ulp
     compute_grid_option = False
     tests_grid = False
     inputs: Dict[str, Any] = {}
diff --git a/ndsl/stencils/testing/savepoint.py b/ndsl/stencils/testing/savepoint.py
index 7571befb..bd6ef995 100644
--- a/ndsl/stencils/testing/savepoint.py
+++ b/ndsl/stencils/testing/savepoint.py
@@ -37,13 +37,13 @@ class SavepointCase:
 
     savepoint_name: str
     data_dir: str
-    rank: int
     i_call: int
     testobj: Translate
     grid: Grid
+    sort_report: str
 
     def __str__(self):
-        return f"{self.savepoint_name}-rank={self.rank}-call={self.i_call}"
+        return f"{self.savepoint_name}-rank={self.grid.rank}-call={self.i_call}"
 
     @property
     def exists(self) -> bool:
@@ -51,14 +51,14 @@ def exists(self) -> bool:
             xr.open_dataset(
                 os.path.join(self.data_dir, f"{self.savepoint_name}-In.nc")
             ).sizes["rank"]
-            > self.rank
+            > self.grid.rank
         )
 
     @property
     def ds_in(self) -> xr.Dataset:
         return (
             xr.open_dataset(os.path.join(self.data_dir, f"{self.savepoint_name}-In.nc"))
-            .isel(rank=self.rank)
+            .isel(rank=self.grid.rank)
             .isel(savepoint=self.i_call)
         )
 
@@ -68,6 +68,6 @@ def ds_out(self) -> xr.Dataset:
             xr.open_dataset(
                 os.path.join(self.data_dir, f"{self.savepoint_name}-Out.nc")
             )
-            .isel(rank=self.rank)
+            .isel(rank=self.grid.rank)
             .isel(savepoint=self.i_call)
         )
diff --git a/ndsl/stencils/testing/test_translate.py b/ndsl/stencils/testing/test_translate.py
index 70480c16..5851a636 100644
--- a/ndsl/stencils/testing/test_translate.py
+++ b/ndsl/stencils/testing/test_translate.py
@@ -178,7 +178,7 @@ def test_sequential_savepoint(
     if case.testobj.skip_test:
         return
     if not case.exists:
-        pytest.skip(f"Data at rank {case.rank} does not exists")
+        pytest.skip(f"Data at rank {case.grid.rank} does not exists")
     input_data = dataset_to_dict(case.ds_in)
     input_names = (
         case.testobj.serialnames(case.testobj.in_vars["data_vars"])
@@ -218,6 +218,7 @@ def test_sequential_savepoint(
                     absolute_eps_override=case.testobj.mmr_absolute_eps,
                     relative_fraction_override=case.testobj.mmr_relative_fraction,
                     ulp_override=case.testobj.mmr_ulp,
+                    sort_report=case.sort_report,
                 )
             else:
                 metric = LegacyMetric(
@@ -234,7 +235,7 @@ def test_sequential_savepoint(
         ref_data_out[varname] = [ref_data]
 
     # Reporting & data save
-    _report_results(case.savepoint_name, results)
+    _report_results(case.savepoint_name, case.grid.rank, results)
     if len(failing_names) > 0:
         get_thresholds(case.testobj, input_data=original_input_data)
         os.makedirs(OUTDIR, exist_ok=True)
@@ -341,7 +342,7 @@ def test_parallel_savepoint(
     if (grid == "compute") and not case.testobj.compute_grid_option:
         pytest.xfail(f"Grid compute option not used for test {case.savepoint_name}")
     if not case.exists:
-        pytest.skip(f"Data at rank {case.rank} does not exists")
+        pytest.skip(f"Data at rank {case.grid.rank} does not exists")
     input_data = dataset_to_dict(case.ds_in)
     # run python version of functionality
     output = case.testobj.compute_parallel(input_data, communicator)
@@ -368,9 +369,12 @@ def test_parallel_savepoint(
                 metric = MultiModalFloatMetric(
                     reference_values=ref_data[varname][0],
                     computed_values=output_data,
-                    eps=case.testobj.max_error,
+                    absolute_eps_override=case.testobj.mmr_absolute_eps,
+                    relative_fraction_override=case.testobj.mmr_relative_fraction,
+                    ulp_override=case.testobj.mmr_ulp,
                     ignore_near_zero_errors=ignore_near_zero,
                     near_zero=case.testobj.near_zero,
+                    sort_report=case.sort_report,
                 )
             else:
                 metric = LegacyMetric(
@@ -386,7 +390,7 @@ def test_parallel_savepoint(
             passing_names.append(failing_names.pop())
 
     # Reporting & data save
-    _report_results(case.savepoint_name, results)
+    _report_results(case.savepoint_name, case.grid.rank, results)
     if len(failing_names) > 0:
         os.makedirs(OUTDIR, exist_ok=True)
         nct_filename = os.path.join(
@@ -414,17 +418,23 @@ def test_parallel_savepoint(
         pytest.fail("No tests passed")
 
 
-def _report_results(savepoint_name: str, results: Dict[str, BaseMetric]) -> None:
+def _report_results(
+    savepoint_name: str,
+    rank: int,
+    results: Dict[str, BaseMetric],
+) -> None:
     os.makedirs(OUTDIR, exist_ok=True)
 
     # Summary
-    with open(f"{OUTDIR}/summary-{savepoint_name}.log", "w") as f:
+    with open(f"{OUTDIR}/summary-{savepoint_name}-{rank}.log", "w") as f:
         for varname, metric in results.items():
             f.write(f"{varname}: {metric.one_line_report()}\n")
 
     # Detailed log
     for varname, metric in results.items():
-        log_filename = os.path.join(OUTDIR, f"details-{savepoint_name}-{varname}.log")
+        log_filename = os.path.join(
+            OUTDIR, f"details-{savepoint_name}-{varname}-{rank}.log"
+        )
         metric.report(log_filename)
 
 
@@ -434,16 +444,20 @@ def save_netcdf(
     inputs_list: List[Dict[str, List[np.ndarray]]],
     output_list: List[Dict[str, List[np.ndarray]]],
     ref_data: Dict[str, List[np.ndarray]],
-    failing_names,
+    failing_names: List[str],
     out_filename,
 ):
     import xarray as xr
 
     data_vars = {}
-    for i, varname in enumerate(failing_names):
+    indices = np.argsort(failing_names)
+    for index in indices:
+        varname = failing_names[index]
         # Read in dimensions and attributes
         if hasattr(testobj, "outputs"):
-            dims = [dim_name + f"_{i}" for dim_name in testobj.outputs[varname]["dims"]]
+            dims = [
+                dim_name + f"_{index}" for dim_name in testobj.outputs[varname]["dims"]
+            ]
             attrs = {"units": testobj.outputs[varname]["units"]}
         else:
             dims = [
diff --git a/ndsl/stencils/testing/translate.py b/ndsl/stencils/testing/translate.py
index e3fc8845..7aae7922 100644
--- a/ndsl/stencils/testing/translate.py
+++ b/ndsl/stencils/testing/translate.py
@@ -51,6 +51,12 @@ def _convert(value: Union[Quantity, np.ndarray]) -> np.ndarray:
 
 
 class TranslateFortranData2Py:
+    """Translate test main class
+
+    The translate test will will test a set of inputs and outputs, after having processed
+    the inputs via the user provided `compute_func`.
+    """
+
     max_error = 1e-14
     near_zero = 1e-18
     mmr_absolute_eps = -1
@@ -73,6 +79,8 @@ def setup(self, inputs):
         self.make_storage_data_input_vars(inputs)
 
     def compute_func(self, **inputs):
+        """Compute function to transform the dictionary of `inputs`.
+        Must return a dictionnary of updated variables"""
         raise NotImplementedError("Implement a child class compute method")
 
     def compute(self, inputs):
@@ -81,6 +89,10 @@ def compute(self, inputs):
 
     # assume inputs already has been turned into gt4py storages (or Quantities)
     def compute_from_storage(self, inputs):
+        """Run `compute_func` and return an updated `inputs` dictionary with
+        the returned results of `compute_func`.
+
+        Hypothesis: `inputs` are `gt4py.storages`"""
         outputs = self.compute_func(**inputs)
         if outputs is not None:
             inputs.update(outputs)
@@ -109,6 +121,10 @@ def make_storage_data(
         read_only: bool = False,
         full_shape: bool = False,
     ) -> Dict[str, "Field"]:
+        """Copy input data into a gt4py.storage with given shape.
+
+        `array` is copied. Takes care of the device upload if necessary.
+        """
         use_shape = list(self.maxshape)
         if dummy_axes:
             for axis in dummy_axes:
@@ -168,6 +184,8 @@ def collect_start_indices(self, datashape, varinfo):
         return istart, jstart, kstart
 
     def make_storage_data_input_vars(self, inputs, storage_vars=None, dict_4d=True):
+        """From a set of raw inputs, use the `in_vars` dictionnary to update inputs to
+        their configured shape."""
         inputs_in = {**inputs}
         inputs_out = {}
         if storage_vars is None:
diff --git a/ndsl/testing/comparison.py b/ndsl/testing/comparison.py
index 5bce02dd..c59f5eab 100644
--- a/ndsl/testing/comparison.py
+++ b/ndsl/testing/comparison.py
@@ -1,9 +1,25 @@
-from typing import List, Optional, Union
+from typing import Any, List, Optional, Union
 
 import numpy as np
 import numpy.typing as npt
 
 
+def _fixed_width_float_16e(value: np.floating[Any]) -> str:
+    """Account for extra '-' character"""
+    if value > 0:
+        return f" {value:.16e}"
+    else:
+        return f"{value:.16e}"
+
+
+def _fixed_width_float_2e(value: np.floating[Any]) -> str:
+    """Account for extra '-' character"""
+    if value > 0:
+        return f" {value:.2e}"
+    else:
+        return f"{value:.2e}"
+
+
 class BaseMetric:
     def __init__(
         self,
@@ -210,6 +226,7 @@ def __init__(
         absolute_eps_override: float = -1,
         relative_fraction_override: float = -1,
         ulp_override: float = -1,
+        sort_report: str = "ulp",
         **kwargs,
     ):
         super().__init__(reference_values, computed_values)
@@ -235,6 +252,7 @@ def __init__(
 
         self.success = self._compute_all_metrics()
         self.check = np.all(self.success)
+        self.sort_report = sort_report
 
     def _compute_all_metrics(
         self,
@@ -290,9 +308,9 @@ def _has_override(self) -> bool:
         )
 
     def one_line_report(self) -> str:
-        metric_thresholds = f"{'🔶 ' if not self.absolute_eps.is_default else '' }Absolute E(<{self.absolute_eps.value:.2e})  "
-        metric_thresholds += f"{'🔶 ' if not self.relative_fraction.is_default else '' }Relative E(<{self.relative_fraction.value * 100:.2e}%)   "
-        metric_thresholds += f"{'🔶 ' if not self.ulp_threshold.is_default else '' }ULP E(<{self.ulp_threshold.value})"
+        metric_thresholds = f"{'🔶 ' if not self.absolute_eps.is_default else ''}Absolute E(<{self.absolute_eps.value:.2e})  "
+        metric_thresholds += f"{'🔶 ' if not self.relative_fraction.is_default else ''}Relative E(<{self.relative_fraction.value * 100:.2e}%)   "
+        metric_thresholds += f"{'🔶 ' if not self.ulp_threshold.is_default else ''}ULP E(<{self.ulp_threshold.value})"
         if self.check and self._has_override():
             return f"🔶 No numerical differences with threshold override - metric: {metric_thresholds}"
         elif self.check:
@@ -305,33 +323,51 @@ def one_line_report(self) -> str:
     def report(self, file_path: Optional[str] = None) -> List[str]:
         report = []
         report.append(self.one_line_report())
-        if not self.check:
-            found_indices = np.logical_not(self.success).nonzero()
-            # List all errors to terminal and file
-            bad_indices_count = len(found_indices[0])
-            full_count = len(self.references.flatten())
-            failures_pct = round(100.0 * (bad_indices_count / full_count), 2)
-            report = [
-                f"All failures ({bad_indices_count}/{full_count}) ({failures_pct}%),\n",
-                f"Index   Computed   Reference   "
-                f"{'🔶 ' if not self.absolute_eps.is_default else '' }Absolute E(<{self.absolute_eps.value:.2e})  "
-                f"{'🔶 ' if not self.relative_fraction.is_default else '' }Relative E(<{self.relative_fraction.value * 100:.2e}%)   "
-                f"{'🔶 ' if not self.ulp_threshold.is_default else '' }ULP E(<{self.ulp_threshold.value})",
-            ]
-            # Summary and worst result
-            for iBad in range(bad_indices_count):
-                fi = tuple([f[iBad] for f in found_indices])
-                ulp_dist = (
-                    self.ulp_distance[fi]
-                    if np.isnan(self.ulp_distance[fi])
-                    else int(self.ulp_distance[fi])
-                )
-                report.append(
-                    f"{str(fi)}  {self.computed[fi]:.16e}  {self.references[fi]:.16e}  "
-                    f"{self.absolute_distance[fi]:.2e} {'✅' if self.absolute_distance_metric[fi] else '❌'}  "
-                    f"{self.relative_distance[fi] * 100:.2e} {'✅' if self.relative_distance_metric[fi] else '❌'}  "
-                    f"{ulp_dist:02} {'✅' if self.ulp_distance_metric[fi] else '❌'}  "
-                )
+        failed_indices = np.logical_not(self.success).nonzero()
+        # List all errors to terminal and file
+        bad_indices_count = len(failed_indices[0])
+        full_count = len(self.references.flatten())
+        failures_pct = round(100.0 * (bad_indices_count / full_count), 2)
+        report = [
+            f"All failures ({bad_indices_count}/{full_count}) ({failures_pct}%),\n",
+            f"Index   Computed   Reference   "
+            f"{'🔶 ' if not self.absolute_eps.is_default else ''}Absolute E(<{self.absolute_eps.value:.2e})  "
+            f"{'🔶 ' if not self.relative_fraction.is_default else ''}Relative E(<{self.relative_fraction.value * 100:.2e}%)   "
+            f"{'🔶 ' if not self.ulp_threshold.is_default else ''}ULP E(<{self.ulp_threshold.value})",
+        ]
+        # Summary and worst result
+        if self.sort_report == "ulp":
+            indices_flatten = np.argsort(self.ulp_distance.flatten())
+        elif self.sort_report == "absolute":
+            indices_flatten = np.argsort(self.absolute_distance.flatten())
+        elif self.sort_report == "relative":
+            indices_flatten = np.argsort(self.relative_distance.flatten())
+        elif self.sort_report == "index":
+            indices_flatten = list(range(self.ulp_distance.size - 1, -1, -1))
+        else:
+            RuntimeError(
+                f"[Translate test] Unknown {self.sort_report} report sorting option."
+            )
+        for iFlat in indices_flatten[::-1]:
+            fi = np.unravel_index(iFlat, shape=self.ulp_distance.shape)
+            ulp_dist = (
+                self.ulp_distance[fi]
+                if np.isnan(self.ulp_distance[fi])
+                else int(self.ulp_distance[fi])
+            )
+            index_as_string = "("
+            for i in fi:
+                index_as_string += f"{i:02},"
+            index_as_string = index_as_string[:-1]
+            index_as_string += ")"
+            report.append(
+                f"{index_as_string}  "
+                f"{_fixed_width_float_16e(self.computed[fi])}  "
+                f"{_fixed_width_float_16e(self.references[fi])}  "
+                f"{_fixed_width_float_2e(self.absolute_distance[fi])} {'✅' if self.absolute_distance_metric[fi] else '❌'}  "
+                f"{_fixed_width_float_2e(self.relative_distance[fi] * 100)} {'✅' if self.relative_distance_metric[fi] else '❌'}  "
+                f"{ulp_dist:02} {'✅' if self.ulp_distance_metric[fi] else '❌'}  "
+            )
 
         if file_path:
             with open(file_path, "w") as fd: