Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions ndsl/stencils/testing/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,12 @@ def pytest_addoption(parser):
default=False,
help="Use the multi-modal float metric. Default to False.",
)
parser.addoption(
"--sort_report",
action="store",
default="ulp",
help='Sort the report by "index" (ascending) or along the metric: "ulp", "absolute", "relative" (descending). Default to "ulp"',
)


def pytest_configure(config):
Expand Down Expand Up @@ -237,6 +243,7 @@ def sequential_savepoint_cases(metafunc, data_path, namelist_filename, *, backen
savepoint_to_replay = get_savepoint_restriction(metafunc)
grid_mode = metafunc.config.getoption("grid")
topology_mode = metafunc.config.getoption("topology")
sort_report = metafunc.config.getoption("sort_report")
return _savepoint_cases(
savepoint_names,
ranks,
Expand All @@ -247,6 +254,7 @@ def sequential_savepoint_cases(metafunc, data_path, namelist_filename, *, backen
data_path,
grid_mode,
topology_mode,
sort_report=sort_report,
)


Expand All @@ -260,6 +268,7 @@ def _savepoint_cases(
data_path: str,
grid_mode: str,
topology_mode: bool,
sort_report: str,
):
return_list = []
for rank in ranks:
Expand Down Expand Up @@ -313,6 +322,7 @@ def _savepoint_cases(
i_call=i_call,
testobj=testobj,
grid=grid,
sort_report=sort_report,
)
)
return return_list
Expand All @@ -333,6 +343,7 @@ def parallel_savepoint_cases(
):
namelist = get_namelist(namelist_filename)
topology_mode = metafunc.config.getoption("topology")
sort_report = metafunc.config.getoption("sort_report")
communicator = get_communicator(comm, namelist.layout, topology_mode)
stencil_config = get_config(backend, communicator)
savepoint_names = get_parallel_savepoint_names(metafunc, data_path)
Expand All @@ -348,6 +359,7 @@ def parallel_savepoint_cases(
data_path,
grid_mode,
topology_mode,
sort_report=sort_report,
)


Expand Down Expand Up @@ -416,6 +428,11 @@ def multimodal_metric(pytestconfig):
return bool(pytestconfig.getoption("multimodal_metric"))


@pytest.fixture()
def sort_report(pytestconfig):
return pytestconfig.getoption("report_sort")


@pytest.fixture()
def grid(pytestconfig):
return pytestconfig.getoption("grid")
Expand Down
3 changes: 3 additions & 0 deletions ndsl/stencils/testing/parallel_translate.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@
class ParallelTranslate:
max_error = TranslateFortranData2Py.max_error
near_zero = TranslateFortranData2Py.near_zero
mmr_absolute_eps = TranslateFortranData2Py.mmr_absolute_eps
mmr_relative_fraction = TranslateFortranData2Py.mmr_relative_fraction
mmr_ulp = TranslateFortranData2Py.mmr_ulp
compute_grid_option = False
tests_grid = False
inputs: Dict[str, Any] = {}
Expand Down
1 change: 1 addition & 0 deletions ndsl/stencils/testing/savepoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ class SavepointCase:
i_call: int
testobj: Translate
grid: Grid
sort_report: str

def __str__(self):
return f"{self.savepoint_name}-rank={self.rank}-call={self.i_call}"
Expand Down
32 changes: 23 additions & 9 deletions ndsl/stencils/testing/test_translate.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,7 @@ def test_sequential_savepoint(
absolute_eps_override=case.testobj.mmr_absolute_eps,
relative_fraction_override=case.testobj.mmr_relative_fraction,
ulp_override=case.testobj.mmr_ulp,
sort_report=case.sort_report,
)
else:
metric = LegacyMetric(
Expand All @@ -234,7 +235,7 @@ def test_sequential_savepoint(
ref_data_out[varname] = [ref_data]

# Reporting & data save
_report_results(case.savepoint_name, results)
_report_results(case.savepoint_name, case.rank, results)
if len(failing_names) > 0:
get_thresholds(case.testobj, input_data=original_input_data)
os.makedirs(OUTDIR, exist_ok=True)
Expand Down Expand Up @@ -368,9 +369,12 @@ def test_parallel_savepoint(
metric = MultiModalFloatMetric(
reference_values=ref_data[varname][0],
computed_values=output_data,
eps=case.testobj.max_error,
absolute_eps_override=case.testobj.mmr_absolute_eps,
relative_fraction_override=case.testobj.mmr_relative_fraction,
ulp_override=case.testobj.mmr_ulp,
ignore_near_zero_errors=ignore_near_zero,
near_zero=case.testobj.near_zero,
sort_report=case.sort_report,
)
else:
metric = LegacyMetric(
Expand All @@ -386,7 +390,7 @@ def test_parallel_savepoint(
passing_names.append(failing_names.pop())

# Reporting & data save
_report_results(case.savepoint_name, results)
_report_results(case.savepoint_name, case.rank, results)
Comment thread
FlorianDeconinck marked this conversation as resolved.
Outdated
if len(failing_names) > 0:
os.makedirs(OUTDIR, exist_ok=True)
nct_filename = os.path.join(
Expand Down Expand Up @@ -414,17 +418,23 @@ def test_parallel_savepoint(
pytest.fail("No tests passed")


def _report_results(savepoint_name: str, results: Dict[str, BaseMetric]) -> None:
def _report_results(
savepoint_name: str,
rank: int,
results: Dict[str, BaseMetric],
) -> None:
os.makedirs(OUTDIR, exist_ok=True)

# Summary
with open(f"{OUTDIR}/summary-{savepoint_name}.log", "w") as f:
with open(f"{OUTDIR}/summary-{savepoint_name}-{rank}.log", "w") as f:
for varname, metric in results.items():
f.write(f"{varname}: {metric.one_line_report()}\n")

# Detailed log
for varname, metric in results.items():
log_filename = os.path.join(OUTDIR, f"details-{savepoint_name}-{varname}.log")
log_filename = os.path.join(
OUTDIR, f"details-{savepoint_name}-{varname}-{rank}.log"
)
metric.report(log_filename)


Expand All @@ -434,16 +444,20 @@ def save_netcdf(
inputs_list: List[Dict[str, List[np.ndarray]]],
output_list: List[Dict[str, List[np.ndarray]]],
ref_data: Dict[str, List[np.ndarray]],
failing_names,
failing_names: List[str],
Comment thread
FlorianDeconinck marked this conversation as resolved.
out_filename,
):
import xarray as xr

data_vars = {}
for i, varname in enumerate(failing_names):
indices = np.argsort(failing_names)
for index in indices:
varname = failing_names[index]
# Read in dimensions and attributes
if hasattr(testobj, "outputs"):
dims = [dim_name + f"_{i}" for dim_name in testobj.outputs[varname]["dims"]]
dims = [
dim_name + f"_{index}" for dim_name in testobj.outputs[varname]["dims"]
]
attrs = {"units": testobj.outputs[varname]["units"]}
else:
dims = [
Expand Down
98 changes: 67 additions & 31 deletions ndsl/testing/comparison.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,25 @@
from typing import List, Optional, Union
from typing import Any, List, Optional, Union

import numpy as np
import numpy.typing as npt


def _fixed_width_float_16e(value: np.floating[Any]) -> str:
"""Account for extra '-' character"""
if value > 0:
return f" {value:.16e}"
else:
return f"{value:.16e}"


def _fixed_width_float_2e(value: np.floating[Any]) -> str:
"""Account for extra '-' character"""
if value > 0:
return f" {value:.2e}"
else:
return f"{value:.2e}"


class BaseMetric:
def __init__(
self,
Expand Down Expand Up @@ -210,6 +226,7 @@ def __init__(
absolute_eps_override: float = -1,
relative_fraction_override: float = -1,
ulp_override: float = -1,
sort_report: str = "ulp",
**kwargs,
):
super().__init__(reference_values, computed_values)
Expand All @@ -235,6 +252,7 @@ def __init__(

self.success = self._compute_all_metrics()
self.check = np.all(self.success)
self.sort_report = sort_report

def _compute_all_metrics(
self,
Expand Down Expand Up @@ -290,9 +308,9 @@ def _has_override(self) -> bool:
)

def one_line_report(self) -> str:
metric_thresholds = f"{'🔶 ' if not self.absolute_eps.is_default else '' }Absolute E(<{self.absolute_eps.value:.2e}) "
metric_thresholds += f"{'🔶 ' if not self.relative_fraction.is_default else '' }Relative E(<{self.relative_fraction.value * 100:.2e}%) "
metric_thresholds += f"{'🔶 ' if not self.ulp_threshold.is_default else '' }ULP E(<{self.ulp_threshold.value})"
metric_thresholds = f"{'🔶 ' if not self.absolute_eps.is_default else ''}Absolute E(<{self.absolute_eps.value:.2e}) "
metric_thresholds += f"{'🔶 ' if not self.relative_fraction.is_default else ''}Relative E(<{self.relative_fraction.value * 100:.2e}%) "
metric_thresholds += f"{'🔶 ' if not self.ulp_threshold.is_default else ''}ULP E(<{self.ulp_threshold.value})"
if self.check and self._has_override():
return f"🔶 No numerical differences with threshold override - metric: {metric_thresholds}"
elif self.check:
Expand All @@ -305,33 +323,51 @@ def one_line_report(self) -> str:
def report(self, file_path: Optional[str] = None) -> List[str]:
report = []
report.append(self.one_line_report())
if not self.check:
Comment thread
FlorianDeconinck marked this conversation as resolved.
found_indices = np.logical_not(self.success).nonzero()
# List all errors to terminal and file
bad_indices_count = len(found_indices[0])
full_count = len(self.references.flatten())
failures_pct = round(100.0 * (bad_indices_count / full_count), 2)
report = [
f"All failures ({bad_indices_count}/{full_count}) ({failures_pct}%),\n",
f"Index Computed Reference "
f"{'🔶 ' if not self.absolute_eps.is_default else '' }Absolute E(<{self.absolute_eps.value:.2e}) "
f"{'🔶 ' if not self.relative_fraction.is_default else '' }Relative E(<{self.relative_fraction.value * 100:.2e}%) "
f"{'🔶 ' if not self.ulp_threshold.is_default else '' }ULP E(<{self.ulp_threshold.value})",
]
# Summary and worst result
for iBad in range(bad_indices_count):
fi = tuple([f[iBad] for f in found_indices])
ulp_dist = (
self.ulp_distance[fi]
if np.isnan(self.ulp_distance[fi])
else int(self.ulp_distance[fi])
)
report.append(
f"{str(fi)} {self.computed[fi]:.16e} {self.references[fi]:.16e} "
f"{self.absolute_distance[fi]:.2e} {'✅' if self.absolute_distance_metric[fi] else '❌'} "
f"{self.relative_distance[fi] * 100:.2e} {'✅' if self.relative_distance_metric[fi] else '❌'} "
f"{ulp_dist:02} {'✅' if self.ulp_distance_metric[fi] else '❌'} "
)
failed_indices = np.logical_not(self.success).nonzero()
# List all errors to terminal and file
bad_indices_count = len(failed_indices[0])
full_count = len(self.references.flatten())
failures_pct = round(100.0 * (bad_indices_count / full_count), 2)
report = [
f"All failures ({bad_indices_count}/{full_count}) ({failures_pct}%),\n",
f"Index Computed Reference "
f"{'🔶 ' if not self.absolute_eps.is_default else ''}Absolute E(<{self.absolute_eps.value:.2e}) "
f"{'🔶 ' if not self.relative_fraction.is_default else ''}Relative E(<{self.relative_fraction.value * 100:.2e}%) "
f"{'🔶 ' if not self.ulp_threshold.is_default else ''}ULP E(<{self.ulp_threshold.value})",
]
# Summary and worst result
if self.sort_report == "ulp":
indices_flatten = np.argsort(self.ulp_distance.flatten())
elif self.sort_report == "absolute":
indices_flatten = np.argsort(self.absolute_distance.flatten())
elif self.sort_report == "relative":
indices_flatten = np.argsort(self.relative_distance.flatten())
elif self.sort_report == "index":
indices_flatten = list(range(self.ulp_distance.size - 1, -1, -1))
else:
RuntimeError(
f"[Translate test] Unknown {self.sort_report} report sorting option."
)
for iFlat in indices_flatten[::-1]:
fi = np.unravel_index(iFlat, shape=self.ulp_distance.shape)
ulp_dist = (
self.ulp_distance[fi]
if np.isnan(self.ulp_distance[fi])
else int(self.ulp_distance[fi])
)
index_as_string = "("
for i in fi:
index_as_string += f"{i:02},"
index_as_string = index_as_string[:-1]
index_as_string += ")"
report.append(
f"{index_as_string} "
f"{_fixed_width_float_16e(self.computed[fi])} "
f"{_fixed_width_float_16e(self.references[fi])} "
f"{_fixed_width_float_2e(self.absolute_distance[fi])} {'✅' if self.absolute_distance_metric[fi] else '❌'} "
f"{_fixed_width_float_2e(self.relative_distance[fi] * 100)} {'✅' if self.relative_distance_metric[fi] else '❌'} "
f"{ulp_dist:02} {'✅' if self.ulp_distance_metric[fi] else '❌'} "
)

if file_path:
with open(file_path, "w") as fd:
Expand Down