Skip to content

Commit

Permalink
Add support for dependencies and building run configurations only once
Browse files Browse the repository at this point in the history
  • Loading branch information
EdmundGoodman committed Feb 25, 2024
1 parent 15c31ed commit 4807044
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 14 deletions.
15 changes: 12 additions & 3 deletions src/hpc_multibench/run_configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ def __init__(
self.environment_variables: dict[str, str] = {}
self.directory: Path | None = None
self.build_commands: list[str] = []
self.pre_built: bool = False
self.run_command: str = run_command
self.args: str | None = None
self.instantiation: dict[str, Any] | None = None
Expand Down Expand Up @@ -66,7 +67,11 @@ def sbatch_contents(self) -> str:
sbatch_file += "\necho '===== BUILD ====='\n"
if self.directory is not None:
sbatch_file += f"cd {self.directory}\n"
sbatch_file += "\n".join(self.build_commands) + "\n"
if self.pre_built:
sbatch_file += "echo 'run configuration was pre-built'\n"
else:
sbatch_file += "\n".join(self.build_commands) + "\n"
sbatch_file += "echo\n"

sbatch_file += "\necho '===== RUN ====='\n"
sbatch_file += f"time -p {self.run_command} {self.args}\n"
Expand Down Expand Up @@ -96,7 +101,7 @@ def get_true_output_file_name(self, slurm_id: int) -> str:
"""Get the actual output file name with substituted slurm job id."""
return f"{self.output_file.name[:-8]}__{slurm_id}.out"

def run(self) -> int | None:
def run(self, dependencies: list[int] | None = None) -> int | None:
"""Run the specified run configuration."""
# Ensure the output directory exists before it is used
self.output_file.parent.mkdir(parents=True, exist_ok=True)
Expand All @@ -107,8 +112,12 @@ def run(self) -> int | None:
) as sbatch_tmp:
sbatch_tmp.write(self.sbatch_contents)
sbatch_tmp.flush()
command_list = ["sbatch", Path(sbatch_tmp.name)]
if dependencies is not None:
dependencies_string = ",".join(str(job_id) for job_id in dependencies)
command_list.insert(1, f"--dependency=afterok:{dependencies_string}")
result = subprocess_run( # nosec
["sbatch", Path(sbatch_tmp.name)], # noqa: S603, S607
command_list, # noqa: S603, S607
check=True,
stdout=PIPE,
)
Expand Down
41 changes: 30 additions & 11 deletions src/hpc_multibench/test_bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,24 +151,43 @@ def record(self, args: Namespace) -> None:
if not args.no_clobber and self.output_directory.exists():
rmtree(self.output_directory)

# Realise run configurations from list of instantiations
run_configurations: list[RunConfiguration] = [
run_model.realise(run_name, self.output_directory, instantiation)
for instantiation in self.instantiations
# Realise run configurations from list of instantiations, split up
# by model so they only get built once
realised_run_configurations: dict[str, list[RunConfiguration]] = {
run_name: [
run_model.realise(run_name, self.output_directory, instantiation)
for instantiation in self.instantiations
]
for run_name, run_model in self.run_configuration_models.items()
]
}

# Optionally dry run then stop before actually running
if args.dry_run:
for run_configuration in run_configurations:
print(run_configuration, end="\n\n")
# TODO: Could be closer inside the running logic
for run_configurations in realised_run_configurations.values():
first_flag: bool = True
for run_configuration in run_configurations:
if first_flag:
first_flag = False
else:
run_configuration.pre_built = True
print(run_configuration, end="\n\n")
return

# Run all run configurations and store their slurm job ids
run_configuration_job_ids: dict[RunConfiguration, int | None] = {
run_configuration: run_configuration.run()
for run_configuration in run_configurations
}
run_configuration_job_ids: dict[RunConfiguration, int | None] = {}
for run_configurations in realised_run_configurations.values():
# Add dependencies on the first job of that run configuration, so
# you only need to build it once!
first_job_id: int | None = None
for run_configuration in run_configurations:
if first_job_id is None:
job_id = run_configuration.run()
first_job_id = job_id
else:
run_configuration.pre_built = True
job_id = run_configuration.run(dependencies=[first_job_id])
run_configuration_job_ids[run_configuration] = job_id

# Store slurm job id mappings, excluding ones which failed to launch
self.run_configurations_metadata = [
Expand Down

0 comments on commit 4807044

Please sign in to comment.