From 621860744633dc138c5d01be582e7f92c2f9ef05 Mon Sep 17 00:00:00 2001
From: Stas Bekman <stas00@users.noreply.github.com>
Date: Mon, 19 Feb 2024 15:13:57 -0800
Subject: [PATCH] Automate plotting (#30)

* add auto plotting

* add auto plotting

* cleanup

* cleanup
---
 benchmarks/sizing/README.md          |  6 ++-
 benchmarks/sizing/plotting/README.md | 14 +++++++
 benchmarks/sizing/plotting/bplot.py  | 60 ++++++++++++++++++++++++++++
 benchmarks/sizing/requirements.txt   |  3 +-
 4 files changed, 81 insertions(+), 2 deletions(-)
 create mode 100644 benchmarks/sizing/plotting/README.md
 create mode 100644 benchmarks/sizing/plotting/bplot.py

diff --git a/benchmarks/sizing/README.md b/benchmarks/sizing/README.md
index 7fbfc06..0cc4e22 100644
--- a/benchmarks/sizing/README.md
+++ b/benchmarks/sizing/README.md
@@ -11,6 +11,11 @@ First, install the required packages:
 pip install -r requirements.txt
 ```
 
+## Plotting
+
+Once you run the benchmark you can [plot the results](plotting).
+
+## Benchmarks
 
 There are three scripts within `benchmarks/sizing` that can be run:
 
@@ -145,4 +150,3 @@ Example:
 ```
 python convert_to_csv.py --file_name ../results/bmm.out --output_file ../results/bmm.csv
 ```
-
diff --git a/benchmarks/sizing/plotting/README.md b/benchmarks/sizing/plotting/README.md
new file mode 100644
index 0000000..52e3542
--- /dev/null
+++ b/benchmarks/sizing/plotting/README.md
@@ -0,0 +1,14 @@
+# Plotting
+
+Various ways to plot benchmark results produced by [these tools](..).
+
+## Automated plotting
+
+This script can plot `mm_flops.py` and `bmm_flops.py` results automatically:
+```
+python plotting/bplot.py --results_file mm_m_range_0_20k_16_n2k_k2k-env-vars.txt --notes "MI300X F.linear made by mm_flops.py"
+```
+
+## Tweak the notebook
+
+[transformer_figures.ipynb](transformer_figures.ipynb)
diff --git a/benchmarks/sizing/plotting/bplot.py b/benchmarks/sizing/plotting/bplot.py
new file mode 100644
index 0000000..7ec73e7
--- /dev/null
+++ b/benchmarks/sizing/plotting/bplot.py
@@ -0,0 +1,60 @@
+import argparse
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+from pathlib import Path
+import textwrap
+
+from convert_to_csv import to_pandas
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--results_file", type=str, help=f"results file generated by benchmarks here")
+    parser.add_argument("--notes", type=str, default="", help=f"use to annotate the plot")
+    args = parser.parse_args()
+
+    results_file = Path(args.results_file)
+    if not results_file.exists():
+        raise ValueError(f"can't find {results_file}")
+
+    img_file = results_file.with_suffix('.png')
+
+    df = to_pandas(results_file)
+
+    throughput_col = "throughput" # assumption for now
+
+    # sort out fixed dimensions from the range ones
+    fixed_dim = []
+    range_cols = []
+    for col in df:
+        unique_vals = df[col].unique()
+        if len(unique_vals) == 1:
+            fixed_dim.append(f"{col}={unique_vals[0]}")
+        else:
+            range_cols.append(col)
+
+    range_cols = list(set(range_cols) - set([throughput_col]))
+    # XXX: at the moment assuming that only one dimension is a range, the other are fixed
+    if len(range_cols) != 1:
+        raise ValueError("Currently supporting plotting for benchmarks with one dimension using range")
+
+    # these go on the xlabel along with the variable dimension
+    dim_notes = ", ".join(fixed_dim)
+
+    plt.figure(dpi=500)
+    plt.plot(df[range_cols[0]], df[throughput_col])
+    plt.xlabel(f"{range_cols[0]} ({dim_notes})")
+    plt.ylabel("Throughput \n (TFLOP/s)")
+    plt.title("Throughput of GEMMs of Various Sizes")
+
+    # wrap notes - this can now handle several lines of text.
+    notes = "\n".join(textwrap.wrap(args.notes, width=60))
+
+    plt.annotate(notes,
+                 xy=(0.001, -0.3),
+                 xycoords='axes fraction',
+                 ha='left',
+                 va="center",
+                 fontsize=12)
+
+    plt.savefig(img_file, bbox_inches='tight')
diff --git a/benchmarks/sizing/requirements.txt b/benchmarks/sizing/requirements.txt
index c03fdd4..daed9b1 100644
--- a/benchmarks/sizing/requirements.txt
+++ b/benchmarks/sizing/requirements.txt
@@ -1,6 +1,7 @@
-
 deepspeed
+matplotlib
 numpy
+pandas
 pyyaml
 sentencepiece
 tokenizers