Automate plotting (#30)

* add auto plotting * add auto plotting * cleanup * cleanup
EleutherAI · Feb 19, 2024 · 6218607 · 6218607
1 parent f52e7f3
commit 6218607
Show file tree

Hide file tree

Showing 4 changed files with 81 additions and 2 deletions.
diff --git a/benchmarks/sizing/README.md b/benchmarks/sizing/README.md
@@ -11,6 +11,11 @@ First, install the required packages:
 pip install -r requirements.txt
 ```
 
+## Plotting
+
+Once you run the benchmark you can [plot the results](plotting).
+
+## Benchmarks
 
 There are three scripts within `benchmarks/sizing` that can be run:
 
@@ -145,4 +150,3 @@ Example:
 ```
 python convert_to_csv.py --file_name ../results/bmm.out --output_file ../results/bmm.csv
 ```
-
diff --git a/benchmarks/sizing/plotting/README.md b/benchmarks/sizing/plotting/README.md
@@ -0,0 +1,14 @@
+# Plotting
+
+Various ways to plot benchmark results produced by [these tools](..).
+
+## Automated plotting
+
+This script can plot `mm_flops.py` and `bmm_flops.py` results automatically:
+```
+python plotting/bplot.py --results_file mm_m_range_0_20k_16_n2k_k2k-env-vars.txt --notes "MI300X F.linear made by mm_flops.py"
+```
+
+## Tweak the notebook
+
+[transformer_figures.ipynb](transformer_figures.ipynb)
diff --git a/benchmarks/sizing/plotting/bplot.py b/benchmarks/sizing/plotting/bplot.py
@@ -0,0 +1,60 @@
+import argparse
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+from pathlib import Path
+import textwrap
+
+from convert_to_csv import to_pandas
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--results_file", type=str, help=f"results file generated by benchmarks here")
+    parser.add_argument("--notes", type=str, default="", help=f"use to annotate the plot")
+    args = parser.parse_args()
+
+    results_file = Path(args.results_file)
+    if not results_file.exists():
+        raise ValueError(f"can't find {results_file}")
+
+    img_file = results_file.with_suffix('.png')
+
+    df = to_pandas(results_file)
+
+    throughput_col = "throughput" # assumption for now
+
+    # sort out fixed dimensions from the range ones
+    fixed_dim = []
+    range_cols = []
+    for col in df:
+        unique_vals = df[col].unique()
+        if len(unique_vals) == 1:
+            fixed_dim.append(f"{col}={unique_vals[0]}")
+        else:
+            range_cols.append(col)
+
+    range_cols = list(set(range_cols) - set([throughput_col]))
+    # XXX: at the moment assuming that only one dimension is a range, the other are fixed
+    if len(range_cols) != 1:
+        raise ValueError("Currently supporting plotting for benchmarks with one dimension using range")
+
+    # these go on the xlabel along with the variable dimension
+    dim_notes = ", ".join(fixed_dim)
+
+    plt.figure(dpi=500)
+    plt.plot(df[range_cols[0]], df[throughput_col])
+    plt.xlabel(f"{range_cols[0]} ({dim_notes})")
+    plt.ylabel("Throughput \n (TFLOP/s)")
+    plt.title("Throughput of GEMMs of Various Sizes")
+
+    # wrap notes - this can now handle several lines of text.
+    notes = "\n".join(textwrap.wrap(args.notes, width=60))
+
+    plt.annotate(notes,
+                 xy=(0.001, -0.3),
+                 xycoords='axes fraction',
+                 ha='left',
+                 va="center",
+                 fontsize=12)
+
+    plt.savefig(img_file, bbox_inches='tight')
diff --git a/benchmarks/sizing/requirements.txt b/benchmarks/sizing/requirements.txt
@@ -1,6 +1,7 @@
-
 deepspeed
+matplotlib
 numpy
+pandas
 pyyaml
 sentencepiece
 tokenizers