ai-dynamo · saturley-hall · Oct 14, 2025 · Oct 14, 2025
diff --git a/README.md b/README.md
@@ -178,7 +178,7 @@ Rerun with `curl -N` and change `stream` in the request to `true` to get the res
 
 Dynamo provides comprehensive benchmarking tools to evaluate and optimize your deployments:
 
-- **[Benchmarking Guide](docs/benchmarks/benchmarking.md)** – Compare deployment topologies (aggregated vs. disaggregated vs. vanilla vLLM) using GenAI-Perf
+- **[Benchmarking Guide](docs/benchmarks/benchmarking.md)** – Compare deployment topologies (aggregated vs. disaggregated vs. vanilla vLLM) using AIPerf
 - **[Pre-Deployment Profiling](docs/benchmarks/pre_deployment_profiling.md)** – Optimize configurations before deployment to meet SLA requirements
 
 # Engines

diff --git a/benchmarks/README.md b/benchmarks/README.md
@@ -15,7 +15,7 @@
 
 # Benchmarks
 
-This directory contains benchmarking scripts and tools for performance evaluation of Dynamo deployments. The benchmarking framework is a wrapper around genai-perf that makes it easy to benchmark DynamoGraphDeployments or other deployments with exposed endpoints.
+This directory contains benchmarking scripts and tools for performance evaluation of Dynamo deployments. The benchmarking framework is a wrapper around aiperf that makes it easy to benchmark DynamoGraphDeployments or other deployments with exposed endpoints.
 
 ## Quick Start
 

diff --git a/benchmarks/llm/perf.sh b/benchmarks/llm/perf.sh
@@ -202,7 +202,7 @@ if [ $index -gt 0 ]; then
     echo "--------------------------------"
 fi
 
-echo "Running genai-perf with:"
+echo "Running aiperf with:"
 echo "Model: $model"
 echo "ISL: $isl"
 echo "OSL: $osl"
@@ -214,7 +214,7 @@ for concurrency in "${concurrency_array[@]}"; do
 
   # NOTE: For Dynamo HTTP OpenAI frontend, use `nvext` for fields like
   # `ignore_eos` since they are not in the official OpenAI spec.
-  genai-perf profile \
+  aiperf profile \
     --model ${model} \
     --tokenizer ${model} \
     --endpoint-type chat \

@@ -26,23 +26,21 @@
 
 
 def get_json_paths(search_paths):
-    genai_perf_profile_export_json_paths = []
+    aiperf_profile_export_json_paths = []
     deployment_config_json_paths = []
     for search_path in search_paths:
         deployment_config_json_path = os.path.join(
             search_path, "deployment_config.json"
         )
         if not os.path.exists(deployment_config_json_path):
             raise Exception(f"deployment_config.json not found in {search_path}")
-        for root, dirs, files in os.walk(search_path):
+        for root, _, files in os.walk(search_path):
             for file in files:
-                if file == "profile_export_genai_perf.json":
-                    genai_perf_profile_export_json_paths.append(
-                        os.path.join(root, file)
-                    )
+                if file == "profile_export_aiperf.json":
+                    aiperf_profile_export_json_paths.append(os.path.join(root, file))
                     deployment_config_json_paths.append(deployment_config_json_path)
 
-    return genai_perf_profile_export_json_paths, deployment_config_json_paths
+    return aiperf_profile_export_json_paths, deployment_config_json_paths
 
 
 # search for -concurrency<number> in the name
@@ -81,13 +79,13 @@ def parse_kind_and_mode(deployment_config_json_path):
 
 
 def extract_val_and_concurrency(
-    genai_perf_profile_export_json_paths, deployment_config_json_paths, stat_value="avg"
+    aiperf_profile_export_json_paths, deployment_config_json_paths, stat_value="avg"
 ):
     results = []
-    for genai_perf_profile_export_json_path, deployment_config_json_path in zip(
-        genai_perf_profile_export_json_paths, deployment_config_json_paths
+    for aiperf_profile_export_json_path, deployment_config_json_path in zip(
+        aiperf_profile_export_json_paths, deployment_config_json_paths
     ):
-        with open(genai_perf_profile_export_json_path, "r") as f:
+        with open(aiperf_profile_export_json_path, "r") as f:
             data = json.load(f)
             # output_token_throughput contains only avg
             output_token_throughput = data.get("output_token_throughput", {}).get("avg")
@@ -99,7 +97,7 @@ def extract_val_and_concurrency(
             # request_throughput contains only avg
             request_throughput = data.get("request_throughput", {}).get("avg")
 
-        concurrency = parse_concurrency(genai_perf_profile_export_json_path)
+        concurrency = parse_concurrency(aiperf_profile_export_json_path)
         num_gpus = parse_gpus(deployment_config_json_path)
         kind, mode = parse_kind_and_mode(deployment_config_json_path)
 
@@ -116,7 +114,7 @@ def extract_val_and_concurrency(
 
         results.append(
             {
-                "configuration": genai_perf_profile_export_json_path,
+                "configuration": aiperf_profile_export_json_path,
                 "kind": kind,
                 "mode": mode,
                 "num_gpus": num_gpus,
@@ -241,12 +239,12 @@ def pareto_efficient(ids, points):
     import os
 
     parser = argparse.ArgumentParser(
-        description="Plot Pareto graph from GenAI-Perf artifacts"
+        description="Plot Pareto graph from AIPerf artifacts"
     )
     parser.add_argument(
         "--artifacts-root-dir",
         required=True,
-        help="Root directory containing artifact directories to search for profile_export_genai_perf.json files",
+        help="Root directory containing artifact directories to search for profile_export_aiperf.json files",
     )
     parser.add_argument(
         "--title",
@@ -260,16 +258,16 @@ def pareto_efficient(ids, points):
     if not artifacts_dirs:
         raise ValueError(f"No artifacts directories found in {args.artifacts_root_dir}")
 
-    genai_perf_profile_export_json_paths, deployment_config_json_paths = get_json_paths(
+    aiperf_profile_export_json_paths, deployment_config_json_paths = get_json_paths(
         artifacts_dirs
     )
 
-    if len(genai_perf_profile_export_json_paths) != len(deployment_config_json_paths):
+    if len(aiperf_profile_export_json_paths) != len(deployment_config_json_paths):
         raise ValueError(
-            f"Number of genai_perf_profile_export_json_paths ({len(genai_perf_profile_export_json_paths)}) does not match number of deployment_config_json_paths ({len(deployment_config_json_paths)})"
+            f"Number of aiperf_profile_export_json_paths ({len(aiperf_profile_export_json_paths)}) does not match number of deployment_config_json_paths ({len(deployment_config_json_paths)})"
         )
 
     extracted_values = extract_val_and_concurrency(
-        genai_perf_profile_export_json_paths, deployment_config_json_paths
+        aiperf_profile_export_json_paths, deployment_config_json_paths
     )
     create_pareto_graph(extracted_values, title=args.title)
@@ -5,8 +5,8 @@
 import logging
 import os
 
-from utils.profile_decode import profile_decode
-from utils.profile_prefill import profile_prefill
+from benchmarks.profiler.utils.profile_decode import profile_decode
+from benchmarks.profiler.utils.profile_prefill import profile_prefill
 
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)

@@ -22,13 +22,13 @@
 import numpy as np
 import yaml
 
+from benchmarks.profiler.utils.aiperf import benchmark_decode, benchmark_prefill
 from benchmarks.profiler.utils.config import (
     CONFIG_MODIFIERS,
     WORKER_COMPONENT_NAMES,
     generate_dgd_config_with_planner,
 )
 from benchmarks.profiler.utils.estimate_perf import AIConfiguratorPerfEstimator
-from benchmarks.profiler.utils.genai_perf import benchmark_decode, benchmark_prefill
 from benchmarks.profiler.utils.planner_utils import add_planner_arguments_to_parser
 from benchmarks.profiler.utils.plot import (
     plot_decode_performance,
@@ -245,18 +245,18 @@ async def run_profile(args):
                     f"Logs have been saved to {client.base_log_dir / client.deployment_name}"
                 )
 
-                # run genai-perf
+                # run ai-perf
                 base_url = client.get_service_url()
-                genai_perf_artifact_dir = f"{work_dir}/gap_isl{args.isl}"
-                gap_result = benchmark_prefill(
+                ai_perf_artifact_dir = f"{work_dir}/aiperf_isl{args.isl}"
+                aiperf_result = benchmark_prefill(
                     args.isl,
-                    genai_perf_artifact_dir,
+                    ai_perf_artifact_dir,
                     model_name,
                     model_name,
                     base_url=base_url,
                 )
-                if gap_result is not None:
-                    ttft = gap_result["time_to_first_token"]["avg"]
+                if aiperf_result is not None:
+                    ttft = aiperf_result["records"]["ttft"]["avg"]
 
                 logger.info("Cleaning up deployment...")
                 await client.delete_deployment()
@@ -424,20 +424,23 @@ async def run_profile(args):
                         )
                     else:
                         base_url = client.get_service_url()
-                        genai_perf_artifact_dir = f"{work_dir}/gap_request{num_request}_isl{args.isl}_osl{args.osl}_n{num_request}"
-                        gap_result = benchmark_decode(
+                        ai_perf_artifact_dir = f"{work_dir}/aiperf_request{num_request}_isl{args.isl}_osl{args.osl}_n{num_request}"
+                        aiperf_result = benchmark_decode(
                             args.isl,
                             args.osl,
                             num_request,
-                            genai_perf_artifact_dir,
+                            ai_perf_artifact_dir,
                             model_name,
                             model_name,
                             base_url=base_url,
                         )
-                        if gap_result is not None:
-                            itl = gap_result["inter_token_latency"]["avg"]
+                        if aiperf_result is not None:
+                            itl = aiperf_result["records"]["inter_token_latency"]["avg"]
                             thpt_per_gpu = (
-                                gap_result["output_token_throughput"]["avg"] / num_gpus
+                                aiperf_result["records"]["output_token_throughput"][
+                                    "avg"
+                                ]
+                                / num_gpus
                             )
 
                     if itl is not None and thpt_per_gpu is not None: