Skip to content

Commit

Permalink
Get plotting working for strong/weak scaling
Browse files Browse the repository at this point in the history
  • Loading branch information
EdmundGoodman committed Feb 25, 2024
1 parent dd257fa commit 57016bf
Show file tree
Hide file tree
Showing 6 changed files with 76 additions and 78 deletions.
16 changes: 0 additions & 16 deletions src/hpc_multibench/analysis.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,3 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""A set of functions to analyse the results of a test bench run."""

import matplotlib.pyplot as plt


def line_plot(
data: dict[str, list[tuple[float, float]]], x_label: str, y_label: str
) -> None:
"""Draw a line plot of a data series."""
for name, result in data.items():
print(name, result)
plt.plot(*zip(*result, strict=True), marker="x", label=name)
plt.xlabel(x_label)
plt.ylabel(y_label)
plt.title("Benchmark analysis")
plt.legend()
plt.show()
19 changes: 11 additions & 8 deletions src/hpc_multibench/run_configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,16 +117,19 @@ def run(self) -> int | None:
return None
return int(job_id_search.group(1))

def collect(self, slurm_id: int) -> str | None:
def collect(
self, slurm_id: int, check_queue: bool = False # noqa: FBT001, FBT002
) -> str | None:
"""Collect the output from a completed job with a given slurm id."""
# Check the job is completed in the queue
result = subprocess_run( # nosec
["squeue", "-j", str(slurm_id)], # noqa: S603, S607
check=True,
stdout=PIPE,
)
if SLURM_UNQUEUED_SUBSTRING in result.stdout.decode("utf-8"):
return None
if check_queue:
result = subprocess_run( # nosec
["squeue", "-j", str(slurm_id)], # noqa: S603, S607
check=True,
stdout=PIPE,
)
if SLURM_UNQUEUED_SUBSTRING in result.stdout.decode("utf-8"):
return None

# Return the contents of the specified output file
output_file = self.output_file.parent / self.get_true_output_file_name(slurm_id)
Expand Down
79 changes: 56 additions & 23 deletions src/hpc_multibench/test_bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,16 @@
"""A class representing a test bench composing part of a test plan."""

from argparse import Namespace
from dataclasses import dataclass
from itertools import product
from pathlib import Path
from pickle import dump as pickle_dump # nosec
from pickle import load as pickle_load # nosec
from re import search as re_search
from shutil import rmtree
from typing import TYPE_CHECKING, Any, NamedTuple
from typing import TYPE_CHECKING, Any

import matplotlib.pyplot as plt

from hpc_multibench.yaml_model import BenchModel, RunConfigurationModel

Expand All @@ -19,7 +23,8 @@
BASE_OUTPUT_DIRECTORY = Path("results/")


class RunConfigurationMetadata(NamedTuple):
@dataclass(frozen=True)
class RunConfigurationMetadata:
"""Data about run configurations to persist between program instances."""

job_id: int
Expand Down Expand Up @@ -140,41 +145,69 @@ def record(self, args: Namespace) -> None:
if not args.no_wait:
raise NotImplementedError("Waiting for queue not yet implemented")

def extract_metrics(self, output: str) -> dict[str, str] | None:
"""
Extract the specified metrics from the output file.
Note that run instantiations can be extracted via regex from output.
"""
metrics: dict[str, str] = {}
for metric, regex in self.bench_model.analysis.metrics.items():
metric_search = re_search(regex, output)
if metric_search is None:
return None
# TODO: Support multiple groups by lists as keys?
metrics[metric] = metric_search.group(1)
return metrics

def report(self) -> None:
"""Analyse completed run configurations for the test bench."""
# print(f"Reporting data from test bench '{self.name}'")
# print(
# f"x: {self.bench_model.analysis.plot.x}, "
# f"y: {self.bench_model.analysis.plot.y}"
# )

if self.run_configurations_metadata is None:
print(f"Metadata file does not exist for test bench '{self.name}'!")
return

# Print out `data: dict[str, list[tuple[float, float]]]`

# - Construct realised run configurations from metadata (mapping from metadata to run_config?)
# TODO: Could type alias for slurm job id?
# TODO: Error handling for name not being in models?
reconstructed_run_configurations: dict[
RunConfigurationMetadata, RunConfiguration
] = {
metadata: self.run_configuration_models[metadata.name].realise(
# Reconstruct realised run configurations from the metadata file
reconstructed_run_configurations: dict[int, RunConfiguration] = {
metadata.job_id: self.run_configuration_models[metadata.name].realise(
metadata.name, self.output_directory, metadata.instantiation
)
for metadata in self.run_configurations_metadata
}

# - Collect results from runs (mapping from metadata to results string?)
run_results: dict[RunConfigurationMetadata, str | None] = {
metadata: run_configuration.collect(metadata.job_id)
for metadata, run_configuration in reconstructed_run_configurations.items()
# Collect outputs from the run configurations
# TODO: Add async wait for incomplete jobs
run_outputs: dict[int, tuple[RunConfiguration, str | None]] = {
job_id: (run_configuration, run_configuration.collect(job_id))
for job_id, run_configuration in reconstructed_run_configurations.items()
}

# - Reshape results into required formats for line plot
for metadata, result in run_results.items():
if result is not None:
print(f"{metadata.name}, {result[:10]}")
# Extract the outputs into the data format needed for the line plot
# TODO: Could pull out into analysis file?
data: dict[str, list[tuple[float, float]]] = {
run_name: [] for run_name in self.run_configuration_models
}
for run_configuration, output in run_outputs.values():
if output is not None:
metrics = self.extract_metrics(output)
if metrics is None:
continue
data[run_configuration.name].append(
(
float(metrics[self.bench_model.analysis.plot.x]),
float(metrics[self.bench_model.analysis.plot.y]),
)
)

for name, results in data.items():
print(name, results)
plt.plot(*zip(*results, strict=True), marker="x", label=name)
plt.xlabel(self.bench_model.analysis.plot.x)
plt.ylabel(self.bench_model.analysis.plot.y)
plt.title(self.bench_model.analysis.plot.title)
plt.legend()
plt.show()

# print("\n".join(str(x) for x in self.run_configurations_metadata))
# Load mappings from run config/args to slurm job ids
Expand Down
1 change: 1 addition & 0 deletions src/hpc_multibench/yaml_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ class PlotModel(BaseModel):

x: str
y: str
title: str = "Performance Plot"

# TODO: Needs work to expand capability

Expand Down
4 changes: 2 additions & 2 deletions yaml_examples/example.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@ benches:
- ["64 64 64", "mpirun -np 16 ./test_HPCCG"]
analysis:
metrics:
"Mesh x size": "nx: (\\d+)"
"Mesh z size": "nz: (\\d+)"
"Total time (s)": "Time Summary:[\\s\\S]*Total\\s*: ([\\d\\.]+)[\\s\\S]*\nFLOPS Summary"
plot:
x: "Mesh x size"
x: "Mesh z size"
y: "Total time (s)"
35 changes: 6 additions & 29 deletions yaml_examples/kudu/strong_weak_scaling.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -46,23 +46,12 @@ benches:
# - ["64 64 16", "mpirun -np 64 ./test_HPCCG"]
analysis:
metrics:
"Mesh x size": "nx: (\\d+)"
"Mesh y size": "ny: (\\d+)"
"Mesh z size": "nz: (\\d+)"
"MPI Ranks": "=== RUN INSTANTIATION ===\n\\{.*run_command: mpirun -np (\\d+).*\\}"
"Total time (s)": "Time Summary:[\\s\\S]*Total\\s*: ([\\d\\.]+)[\\s\\S]*\nFLOPS Summary"
"ddot time (s)": "Time Summary:[\\s\\S]*DDOT\\s*: ([\\d\\.]+)[\\s\\S]*\nFLOPS Summary"
"waxpby time (s)": "Time Summary:[\\s\\S]*WAXPBY\\s*: ([\\d\\.]+)[\\s\\S]*\nFLOPS Summary"
"sparsemv time (s)": "Time Summary:[\\s\\S]*SPARSEMV\\s*: ([\\d\\.]+)[\\s\\S]*\nFLOPS Summary"
"Total flops": "FLOPS Summary:[\\s\\S]*Total\\s*: ([\\d\\.]+)[\\s\\S]*\nMFLOPS Summary"
"ddot flops": "FLOPS Summary:[\\s\\S]*DDOT\\s*: ([\\d\\.]+)[\\s\\S]*\nMFLOPS Summary"
"waxpby flops": "FLOPS Summary:[\\s\\S]*WAXPBY\\s*: ([\\d\\.]+)[\\s\\S]*\nMFLOPS Summary"
"sparsemv flops": "FLOPS Summary:[\\s\\S]*SPARSEMV\\s*: ([\\d\\.]+)[\\s\\S]*\nMFLOPS Summary"
"Total mflops": "MFLOPS Summary:[\\s\\S]*Total\\s*: ([\\d\\.]+)"
"ddot mflops": "MFLOPS Summary:[\\s\\S]*DDOT\\s*: ([\\d\\.]+)"
"waxpby mflops": "MFLOPS Summary:[\\s\\S]*WAXPBY\\s*: ([\\d\\.]+)"
"sparsemv mflops": "MFLOPS Summary:[\\s\\S]*SPARSEMV\\s*: ([\\d\\.]+)"
plot:
x: "Mesh x size"
title: "Strong Scaling Plot"
x: "Mesh z size"
y: "Total time (s)"

"weak-scaling":
Expand All @@ -82,21 +71,9 @@ benches:
# - "mpirun -np 64 ./test_HPCCG"
analysis:
metrics:
"Mesh x size": "nx: (\\d+)"
"Mesh y size": "ny: (\\d+)"
"Mesh z size": "nz: (\\d+)"
"MPI Ranks": "=== RUN INSTANTIATION ===\n\\{.*run_command: mpirun -np (\\d+).*\\}"
"Total time (s)": "Time Summary:[\\s\\S]*Total\\s*: ([\\d\\.]+)[\\s\\S]*\nFLOPS Summary"
"ddot time (s)": "Time Summary:[\\s\\S]*DDOT\\s*: ([\\d\\.]+)[\\s\\S]*\nFLOPS Summary"
"waxpby time (s)": "Time Summary:[\\s\\S]*WAXPBY\\s*: ([\\d\\.]+)[\\s\\S]*\nFLOPS Summary"
"sparsemv time (s)": "Time Summary:[\\s\\S]*SPARSEMV\\s*: ([\\d\\.]+)[\\s\\S]*\nFLOPS Summary"
"Total flops": "FLOPS Summary:[\\s\\S]*Total\\s*: ([\\d\\.]+)[\\s\\S]*\nMFLOPS Summary"
"ddot flops": "FLOPS Summary:[\\s\\S]*DDOT\\s*: ([\\d\\.]+)[\\s\\S]*\nMFLOPS Summary"
"waxpby flops": "FLOPS Summary:[\\s\\S]*WAXPBY\\s*: ([\\d\\.]+)[\\s\\S]*\nMFLOPS Summary"
"sparsemv flops": "FLOPS Summary:[\\s\\S]*SPARSEMV\\s*: ([\\d\\.]+)[\\s\\S]*\nMFLOPS Summary"
"Total mflops": "MFLOPS Summary:[\\s\\S]*Total\\s*: ([\\d\\.]+)"
"ddot mflops": "MFLOPS Summary:[\\s\\S]*DDOT\\s*: ([\\d\\.]+)"
"waxpby mflops": "MFLOPS Summary:[\\s\\S]*WAXPBY\\s*: ([\\d\\.]+)"
"sparsemv mflops": "MFLOPS Summary:[\\s\\S]*SPARSEMV\\s*: ([\\d\\.]+)"
plot:
x: "Mesh x size"
title: "Weak Scaling Plot"
x: "MPI Ranks"
y: "Total time (s)"

0 comments on commit 57016bf

Please sign in to comment.