From de3ef1ae4334617f88130baed0455fe093111158 Mon Sep 17 00:00:00 2001 From: EdmundGoodman Date: Sat, 24 Feb 2024 03:46:41 +0000 Subject: [PATCH] Add pickling to store metadata between program runs --- src/hpc_multibench/main.py | 1 + src/hpc_multibench/run_configuration.py | 10 ++-- src/hpc_multibench/test_bench.py | 66 ++++++++++++++++++++----- src/hpc_multibench/test_plan.py | 2 +- src/hpc_multibench/yaml_model.py | 1 + 5 files changed, 63 insertions(+), 17 deletions(-) diff --git a/src/hpc_multibench/main.py b/src/hpc_multibench/main.py index 7b45239..a1a3a5f 100755 --- a/src/hpc_multibench/main.py +++ b/src/hpc_multibench/main.py @@ -25,3 +25,4 @@ def main() -> None: # pragma: no cover test_plan = TestPlan(args.yaml_path) test_plan.record_all() + test_plan.report_all() diff --git a/src/hpc_multibench/run_configuration.py b/src/hpc_multibench/run_configuration.py index 2937ced..4213be5 100755 --- a/src/hpc_multibench/run_configuration.py +++ b/src/hpc_multibench/run_configuration.py @@ -60,7 +60,7 @@ def sbatch_contents(self) -> str: sbatch_file += "scontrol show job $SLURM_JOB_ID\n" if self.instantiation is not None: sbatch_file += "echo '=== RUN INSTANTIATION ==='\n" - sbatch_file += f"echo '{self.instantiation!s}'\n" + sbatch_file += f"echo '{self.instantiation}'\n" sbatch_file += "echo\n" sbatch_file += "\necho '===== BUILD ====='\n" @@ -92,6 +92,10 @@ def get_instantiation_repr(cls, instantiation: dict[str, Any]) -> str: for name, value in instantiation.items() ) + def get_true_output_file_name(self, slurm_id: int) -> str: + """Get the actual output file name with substituted slurm job id.""" + return f"{self.output_file.name[:-8]}__{slurm_id}.out" + def run(self) -> int | None: """Run the specified run configuration.""" # Ensure the output directory exists before it is used @@ -125,9 +129,7 @@ def collect(self, slurm_id: int) -> str | None: return None # Return the contents of the specified output file - output_file = ( - self.output_file.parent / f"{self.output_file.name[:-8]}__{slurm_id}.out" - ) + output_file = self.output_file.parent / self.get_true_output_file_name(slurm_id) if not output_file.exists(): return None return output_file.read_text(encoding="utf-8") diff --git a/src/hpc_multibench/test_bench.py b/src/hpc_multibench/test_bench.py index 2cb61d9..5e50a9d 100755 --- a/src/hpc_multibench/test_bench.py +++ b/src/hpc_multibench/test_bench.py @@ -2,19 +2,29 @@ # -*- coding: utf-8 -*- """A class representing a test bench composing part of a test plan.""" +from dataclasses import dataclass from itertools import product from pathlib import Path +from pickle import dump as pickle_dump # nosec +from pickle import load as pickle_load # nosec from shutil import rmtree -from typing import TYPE_CHECKING, Any +from typing import Any from hpc_multibench.yaml_model import BenchModel, RunConfigurationModel -if TYPE_CHECKING: - from hpc_multibench.run_configuration import RunConfiguration - BASE_OUTPUT_DIRECTORY = Path("results/") +@dataclass +class RunConfigurationMetadata: + """Data about run configurations to persist between program instances.""" + + job_id: int + name: str + output_file_name: str + instantiation: dict[str, Any] | None + + class TestBench: """A test bench composing part of a test plan.""" @@ -28,7 +38,6 @@ def __init__( self.name = name self.run_configuration_models = run_configuration_models self.bench_model = bench_model - self.run_configurations: list[RunConfiguration] | None = None # Validate that all configurations named in the test bench are defined # in the test plan @@ -63,6 +72,28 @@ def instantiations(self) -> list[dict[str, Any]]: for combination in product(*shaped) ] + @property + def _run_configurations_metadata_file(self) -> Path: + """Get the path to the file to save the run configuration metadata.""" + return self.output_directory / "run_configs.pickle" + + @property + def run_configurations_metadata(self) -> list[RunConfigurationMetadata]: + """Retrieve the run configuration metadata from its file.""" + # if not self._run_configurations_metadata_file.exists(): + # pass + # TODO: Could store in human-readable format, pickling only instantations + with self._run_configurations_metadata_file.open("rb") as metadata_file: + return pickle_load(metadata_file) # type: ignore # noqa: PGH003, S301 # nosec + + @run_configurations_metadata.setter + def run_configurations_metadata( + self, metadata: list[RunConfigurationMetadata] + ) -> None: + """Write out the run configuration metadata to its file.""" + with self._run_configurations_metadata_file.open("wb+") as metadata_file: + pickle_dump(metadata, metadata_file) + def record( self, clobber: bool = False, # noqa: FBT001, FBT002 @@ -77,24 +108,34 @@ def record( rmtree(self.output_directory) # Realise run configurations from list of instantiations - self.run_configurations = [ + run_configurations = [ run_model.realise(run_name, self.output_directory, instantiation) for instantiation in self.instantiations for run_name, run_model in self.run_configuration_models.items() ] - # Optionally dry run then return + # Optionally dry run then stop before actually running if dry_run: - for run_configuration in self.run_configurations: + for run_configuration in run_configurations: print(run_configuration, end="\n\n") return - # Run all run configurations - for run_configuration in self.run_configurations: - # TODO: Need to store slurm job id mappings - run_configuration.run() + # Run all run configurations and store their slurm job ids + run_configuration_job_ids = { + run_configuration: 12345 # run_configuration.run() + for run_configuration in run_configurations + } # Store slurm job id mappings + self.run_configurations_metadata = [ + RunConfigurationMetadata( + job_id, + run_configuration.name, + run_configuration.get_true_output_file_name(job_id), + run_configuration.instantiation, + ) + for run_configuration, job_id in run_configuration_job_ids.items() + ] # TODO: Optionally wait for all run configurations to dequeue/terminate if not no_wait: @@ -107,6 +148,7 @@ def report(self) -> None: f"x: {self.bench_model.analysis.plot.x}, " f"y: {self.bench_model.analysis.plot.y}" ) + print("\n".join(str(x) for x in self.run_configurations_metadata)) # Load mappings from run config/args to slurm job ids # Collect outputs of all slurm job ids # Print outputs/do analysis diff --git a/src/hpc_multibench/test_plan.py b/src/hpc_multibench/test_plan.py index 1ce0833..01aa789 100755 --- a/src/hpc_multibench/test_plan.py +++ b/src/hpc_multibench/test_plan.py @@ -32,7 +32,7 @@ def record_all(self) -> None: """Run all the enabled test benches in the plan.""" for bench in self.benches: if bench.bench_model.enabled: - bench.record(dry_run=True) + bench.record(dry_run=False) def report_all(self) -> None: """Analyse all the enabled test benches in the plan.""" diff --git a/src/hpc_multibench/yaml_model.py b/src/hpc_multibench/yaml_model.py index c5ce236..60ceb33 100755 --- a/src/hpc_multibench/yaml_model.py +++ b/src/hpc_multibench/yaml_model.py @@ -37,6 +37,7 @@ def realise( run.directory = Path(self.directory) run.build_commands = self.build_commands run.args = self.args + run.instantiation = instantiation # Update the run configuration based on the instantiation for key, value in instantiation.items():