Skip to content

Commit

Permalink
Add pickling to store metadata between program runs
Browse files Browse the repository at this point in the history
  • Loading branch information
EdmundGoodman committed Feb 24, 2024
1 parent e8c3b18 commit de3ef1a
Show file tree
Hide file tree
Showing 5 changed files with 63 additions and 17 deletions.
1 change: 1 addition & 0 deletions src/hpc_multibench/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,4 @@ def main() -> None: # pragma: no cover

test_plan = TestPlan(args.yaml_path)
test_plan.record_all()
test_plan.report_all()
10 changes: 6 additions & 4 deletions src/hpc_multibench/run_configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def sbatch_contents(self) -> str:
sbatch_file += "scontrol show job $SLURM_JOB_ID\n"
if self.instantiation is not None:
sbatch_file += "echo '=== RUN INSTANTIATION ==='\n"
sbatch_file += f"echo '{self.instantiation!s}'\n"
sbatch_file += f"echo '{self.instantiation}'\n"
sbatch_file += "echo\n"

sbatch_file += "\necho '===== BUILD ====='\n"
Expand Down Expand Up @@ -92,6 +92,10 @@ def get_instantiation_repr(cls, instantiation: dict[str, Any]) -> str:
for name, value in instantiation.items()
)

def get_true_output_file_name(self, slurm_id: int) -> str:
"""Get the actual output file name with substituted slurm job id."""
return f"{self.output_file.name[:-8]}__{slurm_id}.out"

def run(self) -> int | None:
"""Run the specified run configuration."""
# Ensure the output directory exists before it is used
Expand Down Expand Up @@ -125,9 +129,7 @@ def collect(self, slurm_id: int) -> str | None:
return None

# Return the contents of the specified output file
output_file = (
self.output_file.parent / f"{self.output_file.name[:-8]}__{slurm_id}.out"
)
output_file = self.output_file.parent / self.get_true_output_file_name(slurm_id)
if not output_file.exists():
return None
return output_file.read_text(encoding="utf-8")
Expand Down
66 changes: 54 additions & 12 deletions src/hpc_multibench/test_bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,29 @@
# -*- coding: utf-8 -*-
"""A class representing a test bench composing part of a test plan."""

from dataclasses import dataclass
from itertools import product
from pathlib import Path
from pickle import dump as pickle_dump # nosec
from pickle import load as pickle_load # nosec
from shutil import rmtree
from typing import TYPE_CHECKING, Any
from typing import Any

from hpc_multibench.yaml_model import BenchModel, RunConfigurationModel

if TYPE_CHECKING:
from hpc_multibench.run_configuration import RunConfiguration

BASE_OUTPUT_DIRECTORY = Path("results/")


@dataclass
class RunConfigurationMetadata:
"""Data about run configurations to persist between program instances."""

job_id: int
name: str
output_file_name: str
instantiation: dict[str, Any] | None


class TestBench:
"""A test bench composing part of a test plan."""

Expand All @@ -28,7 +38,6 @@ def __init__(
self.name = name
self.run_configuration_models = run_configuration_models
self.bench_model = bench_model
self.run_configurations: list[RunConfiguration] | None = None

# Validate that all configurations named in the test bench are defined
# in the test plan
Expand Down Expand Up @@ -63,6 +72,28 @@ def instantiations(self) -> list[dict[str, Any]]:
for combination in product(*shaped)
]

@property
def _run_configurations_metadata_file(self) -> Path:
"""Get the path to the file to save the run configuration metadata."""
return self.output_directory / "run_configs.pickle"

@property
def run_configurations_metadata(self) -> list[RunConfigurationMetadata]:
"""Retrieve the run configuration metadata from its file."""
# if not self._run_configurations_metadata_file.exists():
# pass
# TODO: Could store in human-readable format, pickling only instantations
with self._run_configurations_metadata_file.open("rb") as metadata_file:
return pickle_load(metadata_file) # type: ignore # noqa: PGH003, S301 # nosec

@run_configurations_metadata.setter
def run_configurations_metadata(
self, metadata: list[RunConfigurationMetadata]
) -> None:
"""Write out the run configuration metadata to its file."""
with self._run_configurations_metadata_file.open("wb+") as metadata_file:
pickle_dump(metadata, metadata_file)

def record(
self,
clobber: bool = False, # noqa: FBT001, FBT002
Expand All @@ -77,24 +108,34 @@ def record(
rmtree(self.output_directory)

# Realise run configurations from list of instantiations
self.run_configurations = [
run_configurations = [
run_model.realise(run_name, self.output_directory, instantiation)
for instantiation in self.instantiations
for run_name, run_model in self.run_configuration_models.items()
]

# Optionally dry run then return
# Optionally dry run then stop before actually running
if dry_run:
for run_configuration in self.run_configurations:
for run_configuration in run_configurations:
print(run_configuration, end="\n\n")
return

# Run all run configurations
for run_configuration in self.run_configurations:
# TODO: Need to store slurm job id mappings
run_configuration.run()
# Run all run configurations and store their slurm job ids
run_configuration_job_ids = {
run_configuration: 12345 # run_configuration.run()
for run_configuration in run_configurations
}

# Store slurm job id mappings
self.run_configurations_metadata = [
RunConfigurationMetadata(
job_id,
run_configuration.name,
run_configuration.get_true_output_file_name(job_id),
run_configuration.instantiation,
)
for run_configuration, job_id in run_configuration_job_ids.items()
]

# TODO: Optionally wait for all run configurations to dequeue/terminate
if not no_wait:
Expand All @@ -107,6 +148,7 @@ def report(self) -> None:
f"x: {self.bench_model.analysis.plot.x}, "
f"y: {self.bench_model.analysis.plot.y}"
)
print("\n".join(str(x) for x in self.run_configurations_metadata))
# Load mappings from run config/args to slurm job ids
# Collect outputs of all slurm job ids
# Print outputs/do analysis
2 changes: 1 addition & 1 deletion src/hpc_multibench/test_plan.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def record_all(self) -> None:
"""Run all the enabled test benches in the plan."""
for bench in self.benches:
if bench.bench_model.enabled:
bench.record(dry_run=True)
bench.record(dry_run=False)

def report_all(self) -> None:
"""Analyse all the enabled test benches in the plan."""
Expand Down
1 change: 1 addition & 0 deletions src/hpc_multibench/yaml_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ def realise(
run.directory = Path(self.directory)
run.build_commands = self.build_commands
run.args = self.args
run.instantiation = instantiation

# Update the run configuration based on the instantiation
for key, value in instantiation.items():
Expand Down

0 comments on commit de3ef1a

Please sign in to comment.