Add pickling to store metadata between program runs

EdmundGoodman · Feb 24, 2024 · de3ef1a · de3ef1a
1 parent e8c3b18
commit de3ef1a
Show file tree

Hide file tree

Showing 5 changed files with 63 additions and 17 deletions.
diff --git a/src/hpc_multibench/main.py b/src/hpc_multibench/main.py
@@ -25,3 +25,4 @@ def main() -> None:  # pragma: no cover
 
     test_plan = TestPlan(args.yaml_path)
     test_plan.record_all()
+    test_plan.report_all()
diff --git a/src/hpc_multibench/run_configuration.py b/src/hpc_multibench/run_configuration.py
@@ -60,7 +60,7 @@ def sbatch_contents(self) -> str:
         sbatch_file += "scontrol show job $SLURM_JOB_ID\n"
         if self.instantiation is not None:
             sbatch_file += "echo '=== RUN INSTANTIATION ==='\n"
-            sbatch_file += f"echo '{self.instantiation!s}'\n"
+            sbatch_file += f"echo '{self.instantiation}'\n"
         sbatch_file += "echo\n"
 
         sbatch_file += "\necho '===== BUILD ====='\n"
@@ -92,6 +92,10 @@ def get_instantiation_repr(cls, instantiation: dict[str, Any]) -> str:
             for name, value in instantiation.items()
         )
 
+    def get_true_output_file_name(self, slurm_id: int) -> str:
+        """Get the actual output file name with substituted slurm job id."""
+        return f"{self.output_file.name[:-8]}__{slurm_id}.out"
+
     def run(self) -> int | None:
         """Run the specified run configuration."""
         # Ensure the output directory exists before it is used
@@ -125,9 +129,7 @@ def collect(self, slurm_id: int) -> str | None:
             return None
 
         # Return the contents of the specified output file
-        output_file = (
-            self.output_file.parent / f"{self.output_file.name[:-8]}__{slurm_id}.out"
-        )
+        output_file = self.output_file.parent / self.get_true_output_file_name(slurm_id)
         if not output_file.exists():
             return None
         return output_file.read_text(encoding="utf-8")

diff --git a/src/hpc_multibench/test_bench.py b/src/hpc_multibench/test_bench.py
@@ -2,19 +2,29 @@
 # -*- coding: utf-8 -*-
 """A class representing a test bench composing part of a test plan."""
 
+from dataclasses import dataclass
 from itertools import product
 from pathlib import Path
+from pickle import dump as pickle_dump  # nosec
+from pickle import load as pickle_load  # nosec
 from shutil import rmtree
-from typing import TYPE_CHECKING, Any
+from typing import Any
 
 from hpc_multibench.yaml_model import BenchModel, RunConfigurationModel
 
-if TYPE_CHECKING:
-    from hpc_multibench.run_configuration import RunConfiguration
-
 BASE_OUTPUT_DIRECTORY = Path("results/")
 
 
+@dataclass
+class RunConfigurationMetadata:
+    """Data about run configurations to persist between program instances."""
+
+    job_id: int
+    name: str
+    output_file_name: str
+    instantiation: dict[str, Any] | None
+
+
 class TestBench:
     """A test bench composing part of a test plan."""
 
@@ -28,7 +38,6 @@ def __init__(
         self.name = name
         self.run_configuration_models = run_configuration_models
         self.bench_model = bench_model
-        self.run_configurations: list[RunConfiguration] | None = None
 
         # Validate that all configurations named in the test bench are defined
         # in the test plan
@@ -63,6 +72,28 @@ def instantiations(self) -> list[dict[str, Any]]:
             for combination in product(*shaped)
         ]
 
+    @property
+    def _run_configurations_metadata_file(self) -> Path:
+        """Get the path to the file to save the run configuration metadata."""
+        return self.output_directory / "run_configs.pickle"
+
+    @property
+    def run_configurations_metadata(self) -> list[RunConfigurationMetadata]:
+        """Retrieve the run configuration metadata from its file."""
+        # if not self._run_configurations_metadata_file.exists():
+        #     pass
+        # TODO: Could store in human-readable format, pickling only instantations
+        with self._run_configurations_metadata_file.open("rb") as metadata_file:
+            return pickle_load(metadata_file)  # type: ignore # noqa: PGH003, S301 # nosec
+
+    @run_configurations_metadata.setter
+    def run_configurations_metadata(
+        self, metadata: list[RunConfigurationMetadata]
+    ) -> None:
+        """Write out the run configuration metadata to its file."""
+        with self._run_configurations_metadata_file.open("wb+") as metadata_file:
+            pickle_dump(metadata, metadata_file)
+
     def record(
         self,
         clobber: bool = False,  # noqa: FBT001, FBT002
@@ -77,24 +108,34 @@ def record(
             rmtree(self.output_directory)
 
         # Realise run configurations from list of instantiations
-        self.run_configurations = [
+        run_configurations = [
             run_model.realise(run_name, self.output_directory, instantiation)
             for instantiation in self.instantiations
             for run_name, run_model in self.run_configuration_models.items()
         ]
 
-        # Optionally dry run then return
+        # Optionally dry run then stop before actually running
         if dry_run:
-            for run_configuration in self.run_configurations:
+            for run_configuration in run_configurations:
                 print(run_configuration, end="\n\n")
             return
 
-        # Run all run configurations
-        for run_configuration in self.run_configurations:
-            # TODO: Need to store slurm job id mappings
-            run_configuration.run()
+        # Run all run configurations and store their slurm job ids
+        run_configuration_job_ids = {
+            run_configuration: 12345  # run_configuration.run()
+            for run_configuration in run_configurations
+        }
 
         # Store slurm job id mappings
+        self.run_configurations_metadata = [
+            RunConfigurationMetadata(
+                job_id,
+                run_configuration.name,
+                run_configuration.get_true_output_file_name(job_id),
+                run_configuration.instantiation,
+            )
+            for run_configuration, job_id in run_configuration_job_ids.items()
+        ]
 
         # TODO: Optionally wait for all run configurations to dequeue/terminate
         if not no_wait:
@@ -107,6 +148,7 @@ def report(self) -> None:
             f"x: {self.bench_model.analysis.plot.x}, "
             f"y: {self.bench_model.analysis.plot.y}"
         )
+        print("\n".join(str(x) for x in self.run_configurations_metadata))
         # Load mappings from run config/args to slurm job ids
         # Collect outputs of all slurm job ids
         # Print outputs/do analysis
diff --git a/src/hpc_multibench/test_plan.py b/src/hpc_multibench/test_plan.py
@@ -32,7 +32,7 @@ def record_all(self) -> None:
         """Run all the enabled test benches in the plan."""
         for bench in self.benches:
             if bench.bench_model.enabled:
-                bench.record(dry_run=True)
+                bench.record(dry_run=False)
 
     def report_all(self) -> None:
         """Analyse all the enabled test benches in the plan."""

diff --git a/src/hpc_multibench/yaml_model.py b/src/hpc_multibench/yaml_model.py
@@ -37,6 +37,7 @@ def realise(
         run.directory = Path(self.directory)
         run.build_commands = self.build_commands
         run.args = self.args
+        run.instantiation = instantiation
 
         # Update the run configuration based on the instantiation
         for key, value in instantiation.items():
Original file line number	Diff line number	Diff line change
Expand Up		@@ -25,3 +25,4 @@ def main() -> None: # pragma: no cover

		test_plan = TestPlan(args.yaml_path)
		test_plan.record_all()
		test_plan.report_all()