From 621860744633dc138c5d01be582e7f92c2f9ef05 Mon Sep 17 00:00:00 2001 From: Stas Bekman Date: Mon, 19 Feb 2024 15:13:57 -0800 Subject: [PATCH] Automate plotting (#30) * add auto plotting * add auto plotting * cleanup * cleanup --- benchmarks/sizing/README.md | 6 ++- benchmarks/sizing/plotting/README.md | 14 +++++++ benchmarks/sizing/plotting/bplot.py | 60 ++++++++++++++++++++++++++++ benchmarks/sizing/requirements.txt | 3 +- 4 files changed, 81 insertions(+), 2 deletions(-) create mode 100644 benchmarks/sizing/plotting/README.md create mode 100644 benchmarks/sizing/plotting/bplot.py diff --git a/benchmarks/sizing/README.md b/benchmarks/sizing/README.md index 7fbfc06..0cc4e22 100644 --- a/benchmarks/sizing/README.md +++ b/benchmarks/sizing/README.md @@ -11,6 +11,11 @@ First, install the required packages: pip install -r requirements.txt ``` +## Plotting + +Once you run the benchmark you can [plot the results](plotting). + +## Benchmarks There are three scripts within `benchmarks/sizing` that can be run: @@ -145,4 +150,3 @@ Example: ``` python convert_to_csv.py --file_name ../results/bmm.out --output_file ../results/bmm.csv ``` - diff --git a/benchmarks/sizing/plotting/README.md b/benchmarks/sizing/plotting/README.md new file mode 100644 index 0000000..52e3542 --- /dev/null +++ b/benchmarks/sizing/plotting/README.md @@ -0,0 +1,14 @@ +# Plotting + +Various ways to plot benchmark results produced by [these tools](..). + +## Automated plotting + +This script can plot `mm_flops.py` and `bmm_flops.py` results automatically: +``` +python plotting/bplot.py --results_file mm_m_range_0_20k_16_n2k_k2k-env-vars.txt --notes "MI300X F.linear made by mm_flops.py" +``` + +## Tweak the notebook + +[transformer_figures.ipynb](transformer_figures.ipynb) diff --git a/benchmarks/sizing/plotting/bplot.py b/benchmarks/sizing/plotting/bplot.py new file mode 100644 index 0000000..7ec73e7 --- /dev/null +++ b/benchmarks/sizing/plotting/bplot.py @@ -0,0 +1,60 @@ +import argparse +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +from pathlib import Path +import textwrap + +from convert_to_csv import to_pandas + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument("--results_file", type=str, help=f"results file generated by benchmarks here") + parser.add_argument("--notes", type=str, default="", help=f"use to annotate the plot") + args = parser.parse_args() + + results_file = Path(args.results_file) + if not results_file.exists(): + raise ValueError(f"can't find {results_file}") + + img_file = results_file.with_suffix('.png') + + df = to_pandas(results_file) + + throughput_col = "throughput" # assumption for now + + # sort out fixed dimensions from the range ones + fixed_dim = [] + range_cols = [] + for col in df: + unique_vals = df[col].unique() + if len(unique_vals) == 1: + fixed_dim.append(f"{col}={unique_vals[0]}") + else: + range_cols.append(col) + + range_cols = list(set(range_cols) - set([throughput_col])) + # XXX: at the moment assuming that only one dimension is a range, the other are fixed + if len(range_cols) != 1: + raise ValueError("Currently supporting plotting for benchmarks with one dimension using range") + + # these go on the xlabel along with the variable dimension + dim_notes = ", ".join(fixed_dim) + + plt.figure(dpi=500) + plt.plot(df[range_cols[0]], df[throughput_col]) + plt.xlabel(f"{range_cols[0]} ({dim_notes})") + plt.ylabel("Throughput \n (TFLOP/s)") + plt.title("Throughput of GEMMs of Various Sizes") + + # wrap notes - this can now handle several lines of text. + notes = "\n".join(textwrap.wrap(args.notes, width=60)) + + plt.annotate(notes, + xy=(0.001, -0.3), + xycoords='axes fraction', + ha='left', + va="center", + fontsize=12) + + plt.savefig(img_file, bbox_inches='tight') diff --git a/benchmarks/sizing/requirements.txt b/benchmarks/sizing/requirements.txt index c03fdd4..daed9b1 100644 --- a/benchmarks/sizing/requirements.txt +++ b/benchmarks/sizing/requirements.txt @@ -1,6 +1,7 @@ - deepspeed +matplotlib numpy +pandas pyyaml sentencepiece tokenizers