diff --git a/ndsl/stencils/testing/conftest.py b/ndsl/stencils/testing/conftest.py index 2ed22fee..474cdb98 100644 --- a/ndsl/stencils/testing/conftest.py +++ b/ndsl/stencils/testing/conftest.py @@ -91,6 +91,12 @@ def pytest_addoption(parser): default=False, help="Use the multi-modal float metric. Default to False.", ) + parser.addoption( + "--sort_report", + action="store", + default="ulp", + help='Sort the report by "index" (ascending) or along the metric: "ulp", "absolute", "relative" (descending). Default to "ulp"', + ) def pytest_configure(config): @@ -237,6 +243,7 @@ def sequential_savepoint_cases(metafunc, data_path, namelist_filename, *, backen savepoint_to_replay = get_savepoint_restriction(metafunc) grid_mode = metafunc.config.getoption("grid") topology_mode = metafunc.config.getoption("topology") + sort_report = metafunc.config.getoption("sort_report") return _savepoint_cases( savepoint_names, ranks, @@ -247,6 +254,7 @@ def sequential_savepoint_cases(metafunc, data_path, namelist_filename, *, backen data_path, grid_mode, topology_mode, + sort_report=sort_report, ) @@ -260,6 +268,7 @@ def _savepoint_cases( data_path: str, grid_mode: str, topology_mode: bool, + sort_report: str, ): return_list = [] for rank in ranks: @@ -309,10 +318,10 @@ def _savepoint_cases( SavepointCase( savepoint_name=test_name, data_dir=data_path, - rank=rank, i_call=i_call, testobj=testobj, grid=grid, + sort_report=sort_report, ) ) return return_list @@ -333,6 +342,7 @@ def parallel_savepoint_cases( ): namelist = get_namelist(namelist_filename) topology_mode = metafunc.config.getoption("topology") + sort_report = metafunc.config.getoption("sort_report") communicator = get_communicator(comm, namelist.layout, topology_mode) stencil_config = get_config(backend, communicator) savepoint_names = get_parallel_savepoint_names(metafunc, data_path) @@ -348,6 +358,7 @@ def parallel_savepoint_cases( data_path, grid_mode, topology_mode, + sort_report=sort_report, ) @@ -416,6 +427,11 @@ def multimodal_metric(pytestconfig): return bool(pytestconfig.getoption("multimodal_metric")) +@pytest.fixture() +def sort_report(pytestconfig): + return pytestconfig.getoption("report_sort") + + @pytest.fixture() def grid(pytestconfig): return pytestconfig.getoption("grid") diff --git a/ndsl/stencils/testing/parallel_translate.py b/ndsl/stencils/testing/parallel_translate.py index e0669994..7df16a17 100644 --- a/ndsl/stencils/testing/parallel_translate.py +++ b/ndsl/stencils/testing/parallel_translate.py @@ -17,6 +17,9 @@ class ParallelTranslate: max_error = TranslateFortranData2Py.max_error near_zero = TranslateFortranData2Py.near_zero + mmr_absolute_eps = TranslateFortranData2Py.mmr_absolute_eps + mmr_relative_fraction = TranslateFortranData2Py.mmr_relative_fraction + mmr_ulp = TranslateFortranData2Py.mmr_ulp compute_grid_option = False tests_grid = False inputs: Dict[str, Any] = {} diff --git a/ndsl/stencils/testing/savepoint.py b/ndsl/stencils/testing/savepoint.py index 7571befb..bd6ef995 100644 --- a/ndsl/stencils/testing/savepoint.py +++ b/ndsl/stencils/testing/savepoint.py @@ -37,13 +37,13 @@ class SavepointCase: savepoint_name: str data_dir: str - rank: int i_call: int testobj: Translate grid: Grid + sort_report: str def __str__(self): - return f"{self.savepoint_name}-rank={self.rank}-call={self.i_call}" + return f"{self.savepoint_name}-rank={self.grid.rank}-call={self.i_call}" @property def exists(self) -> bool: @@ -51,14 +51,14 @@ def exists(self) -> bool: xr.open_dataset( os.path.join(self.data_dir, f"{self.savepoint_name}-In.nc") ).sizes["rank"] - > self.rank + > self.grid.rank ) @property def ds_in(self) -> xr.Dataset: return ( xr.open_dataset(os.path.join(self.data_dir, f"{self.savepoint_name}-In.nc")) - .isel(rank=self.rank) + .isel(rank=self.grid.rank) .isel(savepoint=self.i_call) ) @@ -68,6 +68,6 @@ def ds_out(self) -> xr.Dataset: xr.open_dataset( os.path.join(self.data_dir, f"{self.savepoint_name}-Out.nc") ) - .isel(rank=self.rank) + .isel(rank=self.grid.rank) .isel(savepoint=self.i_call) ) diff --git a/ndsl/stencils/testing/test_translate.py b/ndsl/stencils/testing/test_translate.py index 70480c16..5851a636 100644 --- a/ndsl/stencils/testing/test_translate.py +++ b/ndsl/stencils/testing/test_translate.py @@ -178,7 +178,7 @@ def test_sequential_savepoint( if case.testobj.skip_test: return if not case.exists: - pytest.skip(f"Data at rank {case.rank} does not exists") + pytest.skip(f"Data at rank {case.grid.rank} does not exists") input_data = dataset_to_dict(case.ds_in) input_names = ( case.testobj.serialnames(case.testobj.in_vars["data_vars"]) @@ -218,6 +218,7 @@ def test_sequential_savepoint( absolute_eps_override=case.testobj.mmr_absolute_eps, relative_fraction_override=case.testobj.mmr_relative_fraction, ulp_override=case.testobj.mmr_ulp, + sort_report=case.sort_report, ) else: metric = LegacyMetric( @@ -234,7 +235,7 @@ def test_sequential_savepoint( ref_data_out[varname] = [ref_data] # Reporting & data save - _report_results(case.savepoint_name, results) + _report_results(case.savepoint_name, case.grid.rank, results) if len(failing_names) > 0: get_thresholds(case.testobj, input_data=original_input_data) os.makedirs(OUTDIR, exist_ok=True) @@ -341,7 +342,7 @@ def test_parallel_savepoint( if (grid == "compute") and not case.testobj.compute_grid_option: pytest.xfail(f"Grid compute option not used for test {case.savepoint_name}") if not case.exists: - pytest.skip(f"Data at rank {case.rank} does not exists") + pytest.skip(f"Data at rank {case.grid.rank} does not exists") input_data = dataset_to_dict(case.ds_in) # run python version of functionality output = case.testobj.compute_parallel(input_data, communicator) @@ -368,9 +369,12 @@ def test_parallel_savepoint( metric = MultiModalFloatMetric( reference_values=ref_data[varname][0], computed_values=output_data, - eps=case.testobj.max_error, + absolute_eps_override=case.testobj.mmr_absolute_eps, + relative_fraction_override=case.testobj.mmr_relative_fraction, + ulp_override=case.testobj.mmr_ulp, ignore_near_zero_errors=ignore_near_zero, near_zero=case.testobj.near_zero, + sort_report=case.sort_report, ) else: metric = LegacyMetric( @@ -386,7 +390,7 @@ def test_parallel_savepoint( passing_names.append(failing_names.pop()) # Reporting & data save - _report_results(case.savepoint_name, results) + _report_results(case.savepoint_name, case.grid.rank, results) if len(failing_names) > 0: os.makedirs(OUTDIR, exist_ok=True) nct_filename = os.path.join( @@ -414,17 +418,23 @@ def test_parallel_savepoint( pytest.fail("No tests passed") -def _report_results(savepoint_name: str, results: Dict[str, BaseMetric]) -> None: +def _report_results( + savepoint_name: str, + rank: int, + results: Dict[str, BaseMetric], +) -> None: os.makedirs(OUTDIR, exist_ok=True) # Summary - with open(f"{OUTDIR}/summary-{savepoint_name}.log", "w") as f: + with open(f"{OUTDIR}/summary-{savepoint_name}-{rank}.log", "w") as f: for varname, metric in results.items(): f.write(f"{varname}: {metric.one_line_report()}\n") # Detailed log for varname, metric in results.items(): - log_filename = os.path.join(OUTDIR, f"details-{savepoint_name}-{varname}.log") + log_filename = os.path.join( + OUTDIR, f"details-{savepoint_name}-{varname}-{rank}.log" + ) metric.report(log_filename) @@ -434,16 +444,20 @@ def save_netcdf( inputs_list: List[Dict[str, List[np.ndarray]]], output_list: List[Dict[str, List[np.ndarray]]], ref_data: Dict[str, List[np.ndarray]], - failing_names, + failing_names: List[str], out_filename, ): import xarray as xr data_vars = {} - for i, varname in enumerate(failing_names): + indices = np.argsort(failing_names) + for index in indices: + varname = failing_names[index] # Read in dimensions and attributes if hasattr(testobj, "outputs"): - dims = [dim_name + f"_{i}" for dim_name in testobj.outputs[varname]["dims"]] + dims = [ + dim_name + f"_{index}" for dim_name in testobj.outputs[varname]["dims"] + ] attrs = {"units": testobj.outputs[varname]["units"]} else: dims = [ diff --git a/ndsl/stencils/testing/translate.py b/ndsl/stencils/testing/translate.py index e3fc8845..7aae7922 100644 --- a/ndsl/stencils/testing/translate.py +++ b/ndsl/stencils/testing/translate.py @@ -51,6 +51,12 @@ def _convert(value: Union[Quantity, np.ndarray]) -> np.ndarray: class TranslateFortranData2Py: + """Translate test main class + + The translate test will will test a set of inputs and outputs, after having processed + the inputs via the user provided `compute_func`. + """ + max_error = 1e-14 near_zero = 1e-18 mmr_absolute_eps = -1 @@ -73,6 +79,8 @@ def setup(self, inputs): self.make_storage_data_input_vars(inputs) def compute_func(self, **inputs): + """Compute function to transform the dictionary of `inputs`. + Must return a dictionnary of updated variables""" raise NotImplementedError("Implement a child class compute method") def compute(self, inputs): @@ -81,6 +89,10 @@ def compute(self, inputs): # assume inputs already has been turned into gt4py storages (or Quantities) def compute_from_storage(self, inputs): + """Run `compute_func` and return an updated `inputs` dictionary with + the returned results of `compute_func`. + + Hypothesis: `inputs` are `gt4py.storages`""" outputs = self.compute_func(**inputs) if outputs is not None: inputs.update(outputs) @@ -109,6 +121,10 @@ def make_storage_data( read_only: bool = False, full_shape: bool = False, ) -> Dict[str, "Field"]: + """Copy input data into a gt4py.storage with given shape. + + `array` is copied. Takes care of the device upload if necessary. + """ use_shape = list(self.maxshape) if dummy_axes: for axis in dummy_axes: @@ -168,6 +184,8 @@ def collect_start_indices(self, datashape, varinfo): return istart, jstart, kstart def make_storage_data_input_vars(self, inputs, storage_vars=None, dict_4d=True): + """From a set of raw inputs, use the `in_vars` dictionnary to update inputs to + their configured shape.""" inputs_in = {**inputs} inputs_out = {} if storage_vars is None: diff --git a/ndsl/testing/comparison.py b/ndsl/testing/comparison.py index 5bce02dd..c59f5eab 100644 --- a/ndsl/testing/comparison.py +++ b/ndsl/testing/comparison.py @@ -1,9 +1,25 @@ -from typing import List, Optional, Union +from typing import Any, List, Optional, Union import numpy as np import numpy.typing as npt +def _fixed_width_float_16e(value: np.floating[Any]) -> str: + """Account for extra '-' character""" + if value > 0: + return f" {value:.16e}" + else: + return f"{value:.16e}" + + +def _fixed_width_float_2e(value: np.floating[Any]) -> str: + """Account for extra '-' character""" + if value > 0: + return f" {value:.2e}" + else: + return f"{value:.2e}" + + class BaseMetric: def __init__( self, @@ -210,6 +226,7 @@ def __init__( absolute_eps_override: float = -1, relative_fraction_override: float = -1, ulp_override: float = -1, + sort_report: str = "ulp", **kwargs, ): super().__init__(reference_values, computed_values) @@ -235,6 +252,7 @@ def __init__( self.success = self._compute_all_metrics() self.check = np.all(self.success) + self.sort_report = sort_report def _compute_all_metrics( self, @@ -290,9 +308,9 @@ def _has_override(self) -> bool: ) def one_line_report(self) -> str: - metric_thresholds = f"{'🔶 ' if not self.absolute_eps.is_default else '' }Absolute E(<{self.absolute_eps.value:.2e}) " - metric_thresholds += f"{'🔶 ' if not self.relative_fraction.is_default else '' }Relative E(<{self.relative_fraction.value * 100:.2e}%) " - metric_thresholds += f"{'🔶 ' if not self.ulp_threshold.is_default else '' }ULP E(<{self.ulp_threshold.value})" + metric_thresholds = f"{'🔶 ' if not self.absolute_eps.is_default else ''}Absolute E(<{self.absolute_eps.value:.2e}) " + metric_thresholds += f"{'🔶 ' if not self.relative_fraction.is_default else ''}Relative E(<{self.relative_fraction.value * 100:.2e}%) " + metric_thresholds += f"{'🔶 ' if not self.ulp_threshold.is_default else ''}ULP E(<{self.ulp_threshold.value})" if self.check and self._has_override(): return f"🔶 No numerical differences with threshold override - metric: {metric_thresholds}" elif self.check: @@ -305,33 +323,51 @@ def one_line_report(self) -> str: def report(self, file_path: Optional[str] = None) -> List[str]: report = [] report.append(self.one_line_report()) - if not self.check: - found_indices = np.logical_not(self.success).nonzero() - # List all errors to terminal and file - bad_indices_count = len(found_indices[0]) - full_count = len(self.references.flatten()) - failures_pct = round(100.0 * (bad_indices_count / full_count), 2) - report = [ - f"All failures ({bad_indices_count}/{full_count}) ({failures_pct}%),\n", - f"Index Computed Reference " - f"{'🔶 ' if not self.absolute_eps.is_default else '' }Absolute E(<{self.absolute_eps.value:.2e}) " - f"{'🔶 ' if not self.relative_fraction.is_default else '' }Relative E(<{self.relative_fraction.value * 100:.2e}%) " - f"{'🔶 ' if not self.ulp_threshold.is_default else '' }ULP E(<{self.ulp_threshold.value})", - ] - # Summary and worst result - for iBad in range(bad_indices_count): - fi = tuple([f[iBad] for f in found_indices]) - ulp_dist = ( - self.ulp_distance[fi] - if np.isnan(self.ulp_distance[fi]) - else int(self.ulp_distance[fi]) - ) - report.append( - f"{str(fi)} {self.computed[fi]:.16e} {self.references[fi]:.16e} " - f"{self.absolute_distance[fi]:.2e} {'✅' if self.absolute_distance_metric[fi] else '❌'} " - f"{self.relative_distance[fi] * 100:.2e} {'✅' if self.relative_distance_metric[fi] else '❌'} " - f"{ulp_dist:02} {'✅' if self.ulp_distance_metric[fi] else '❌'} " - ) + failed_indices = np.logical_not(self.success).nonzero() + # List all errors to terminal and file + bad_indices_count = len(failed_indices[0]) + full_count = len(self.references.flatten()) + failures_pct = round(100.0 * (bad_indices_count / full_count), 2) + report = [ + f"All failures ({bad_indices_count}/{full_count}) ({failures_pct}%),\n", + f"Index Computed Reference " + f"{'🔶 ' if not self.absolute_eps.is_default else ''}Absolute E(<{self.absolute_eps.value:.2e}) " + f"{'🔶 ' if not self.relative_fraction.is_default else ''}Relative E(<{self.relative_fraction.value * 100:.2e}%) " + f"{'🔶 ' if not self.ulp_threshold.is_default else ''}ULP E(<{self.ulp_threshold.value})", + ] + # Summary and worst result + if self.sort_report == "ulp": + indices_flatten = np.argsort(self.ulp_distance.flatten()) + elif self.sort_report == "absolute": + indices_flatten = np.argsort(self.absolute_distance.flatten()) + elif self.sort_report == "relative": + indices_flatten = np.argsort(self.relative_distance.flatten()) + elif self.sort_report == "index": + indices_flatten = list(range(self.ulp_distance.size - 1, -1, -1)) + else: + RuntimeError( + f"[Translate test] Unknown {self.sort_report} report sorting option." + ) + for iFlat in indices_flatten[::-1]: + fi = np.unravel_index(iFlat, shape=self.ulp_distance.shape) + ulp_dist = ( + self.ulp_distance[fi] + if np.isnan(self.ulp_distance[fi]) + else int(self.ulp_distance[fi]) + ) + index_as_string = "(" + for i in fi: + index_as_string += f"{i:02}," + index_as_string = index_as_string[:-1] + index_as_string += ")" + report.append( + f"{index_as_string} " + f"{_fixed_width_float_16e(self.computed[fi])} " + f"{_fixed_width_float_16e(self.references[fi])} " + f"{_fixed_width_float_2e(self.absolute_distance[fi])} {'✅' if self.absolute_distance_metric[fi] else '❌'} " + f"{_fixed_width_float_2e(self.relative_distance[fi] * 100)} {'✅' if self.relative_distance_metric[fi] else '❌'} " + f"{ulp_dist:02} {'✅' if self.ulp_distance_metric[fi] else '❌'} " + ) if file_path: with open(file_path, "w") as fd: