ai-dynamo · saturley-hall · Oct 16, 2025 · Oct 15, 2025 · Oct 16, 2025
diff --git a/components/backends/sglang/slurm_jobs/scripts/gap/bench.sh b/components/backends/sglang/slurm_jobs/scripts/gap/bench.sh
@@ -30,14 +30,14 @@ set -e
 warmup_model $head_node $head_port $SERVED_MODEL_NAME $MODEL_PATH "${chosen_isl}x${chosen_osl}x10000x10000x250"
 set +e
 
-genai_perf_warmup_workers=$(python3 -c "print(max(${DP:-0}, ${prefill_workers:-0}, ${decode_workers:-0}))")
+aiperf_warmup_workers=$(python3 -c "print(max(${DP:-0}, ${prefill_workers:-0}, ${decode_workers:-0}))")
 
 IFS='x' read -r -a concurrency_list <<< "$chosen_concurrencies"
 
 profile_folder="/logs/gap_isl_${chosen_isl}_osl_${chosen_osl}"
 mkdir -p $profile_folder
 
-tmp_work_dir=$(mktemp -d -t genai-perf-XXXXXXXX)
+tmp_work_dir=$(mktemp -d -t aiperf-XXXXXXXX)
 for concurrency in ${concurrency_list[@]}; do
     export_folder="${tmp_work_dir}/concurrency_${concurrency}"
     mkdir -p $export_folder
@@ -46,7 +46,7 @@ for concurrency in ${concurrency_list[@]}; do
 
     echo "Run benchmark for concurrency $concurrency; ISL $chosen_isl; OSL $chosen_osl"
     command=(
-        genai-perf profile
+        aiperf profile
         -m ${SERVED_MODEL_NAME}
         --tokenizer ${MODEL_PATH}
         --endpoint-type chat
@@ -55,7 +55,7 @@ for concurrency in ${concurrency_list[@]}; do
         --streaming
 
         --concurrency ${concurrency}
-        --warmup-request-count $(( 2*genai_perf_warmup_workers ))
+        --warmup-request-count $(( 2*aiperf_warmup_workers ))
         --request-count $(( 5*concurrency ))
 
         --synthetic-input-tokens-mean ${chosen_isl} --synthetic-input-tokens-stddev 0
@@ -69,13 +69,11 @@ for concurrency in ${concurrency_list[@]}; do
 
         --tokenizer-trust-remote-code
         --num-dataset-entries 3000
-        --
-        --max-threads ${concurrency}
     )
 
     set -e
     ${command[@]}
     set +e
 
-    cp $export_folder/*/*_genai_perf.json $profile_folder
+    cp $export_folder/*/*_aiperf.json $profile_folder
 done
diff --git a/components/backends/trtllm/deploy/README.md b/components/backends/trtllm/deploy/README.md
@@ -271,7 +271,7 @@ args:
 
 ## Benchmarking
 
-To benchmark your deployment with GenAI-Perf, see this utility script: [perf.sh](../../../../benchmarks/llm/perf.sh)
+To benchmark your deployment with AIPerf, see this utility script: [perf.sh](../../../../benchmarks/llm/perf.sh)
 
 Configure the `model` name and `host` based on your deployment.
 

diff --git a/components/backends/trtllm/performance_sweeps/README.md b/components/backends/trtllm/performance_sweeps/README.md
@@ -38,7 +38,7 @@ Please note that:
 
 1. `submit_disagg.sh` - Main entry point for submitting benchmark jobs for disaggregated configurations. This includes WideEP optimization for DEP>=16.
 2. `submit_agg.sh` - Main entry point for submitting benchmark jobs for aggregated configurations.
-3. `post_process.py` - Scan the genai-perf results to produce a json with entries to each config point.
+3. `post_process.py` - Scan the aiperf results to produce a json with entries to each config point.
 4. `plot_performance_comparison.py` - Takes the json result file for disaggregated and/or aggregated configuration sweeps and plots a pareto line for better visualization.
 
 For more finer grained details on how to launch TRTLLM backend workers with DeepSeek R1 on GB200 slurm, please refer [multinode-examples.md](../../../../docs/backends/trtllm/multinode/multinode-examples.md). This guide shares similar assumption to the multinode examples guide.
@@ -117,9 +117,9 @@ export SERVED_MODEL_NAME="nvidia/DeepSeek-R1-FP4"
 
 ## Post-Processing Results
 
-The above jobs use genAI-perf tool to benchmark each configuration point across different concurrency values. These get stored in `dynamo_disagg-bm-8150-1024/<config-setup>/genai_perf_artifacts` and `dynamo_agg-bm-8150-1024/<config-setup>/genai_perf_artifacts` for disaggregated and aggregated respectively.
+The above jobs use aiperf tool to benchmark each configuration point across different concurrency values. These get stored in `dynamo_disagg-bm-8150-1024/<config-setup>/aiperf_artifacts` and `dynamo_agg-bm-8150-1024/<config-setup>/aiperf_artifacts` for disaggregated and aggregated respectively.
 
-After your benchmarking jobs have completed, you can use the `post_process.py` script to aggregate and summarize the results from the generated genai_perf_artifacts.
+After your benchmarking jobs have completed, you can use the `post_process.py` script to aggregate and summarize the results from the generated aiperf_artifacts.
 
 To run the post-processing script, use:
 
@@ -149,6 +149,6 @@ Refer to [Beyond the Buzz: A Pragmatic Take on Inference Disaggregation](https:/
 
 ## Known Issues
 
-- Some jobs may time out if genai-perf requires more time to complete all concurrency levels.
+- Some jobs may time out if aiperf requires more time to complete all concurrency levels.
 - Workers may encounter out-of-memory (OOM) errors during inference, especially with larger configurations.
 - Configurations affected by these issues will result in missing data points on the performance plot.
diff --git a/components/backends/trtllm/performance_sweeps/benchmark_agg.slurm b/components/backends/trtllm/performance_sweeps/benchmark_agg.slurm
@@ -40,7 +40,7 @@ if [ "${enable_attention_dp}" = "false" ]; then
 fi
 
 full_logdir=${sub_dir}
-artifacts_dir=${full_logdir}/genai_perf_artifacts
+artifacts_dir=${full_logdir}/aiperf_artifacts
 mkdir -p ${artifacts_dir}
 
 

@@ -124,7 +124,7 @@ def extract_throughput_data(csv_path: str) -> Tuple[Optional[float], Optional[fl
     Extract throughput data from CSV file
 
     Args:
-        csv_path: Path to profile_export_genai_perf.csv
+        csv_path: Path to profile_export_aiperf.csv
 
     Returns:
         Tuple of (output_token_throughput, output_token_throughput_per_user)
@@ -184,10 +184,10 @@ def process_directory(dir_path: str) -> Optional[List[Dict[str, Any]]]:
         Dictionary containing extracted data, or None if processing failed
     """
     dir_path_obj = Path(dir_path)
-    artifacts_path = dir_path_obj / "genai_perf_artifacts"
+    artifacts_path = dir_path_obj / "aiperf_artifacts"
 
     if not artifacts_path.exists():
-        print(f"Warning: No genai_perf_artifacts directory found in {dir_path}")
+        print(f"Warning: No aiperf_artifacts directory found in {dir_path}")
         return None
 
     # Parse deployment configuration
@@ -205,7 +205,7 @@ def process_directory(dir_path: str) -> Optional[List[Dict[str, Any]]]:
     csv_files = []
     for item in artifacts_path.iterdir():
         if item.is_dir():
-            csv_path = item / "profile_export_genai_perf.csv"
+            csv_path = item / "profile_export_aiperf.csv"
             if csv_path.exists():
                 csv_files.append(str(csv_path))
 

diff --git a/components/backends/trtllm/performance_sweeps/scripts/bench.sh b/components/backends/trtllm/performance_sweeps/scripts/bench.sh
@@ -54,8 +54,8 @@ set -x
 config_file=${log_path}/config.yaml
 
 
-# install genai-perf
-pip install genai-perf
+# install aiperf
+pip install aiperf
 
 # Create artifacts root directory if it doesn't exist
 if [ ! -d "${artifacts_dir}" ]; then
@@ -153,7 +153,7 @@ for concurrency in ${concurrency_list}; do
     num_prompts=$((concurrency * multi_round))
     echo "Benchmarking with concurrency ${concurrency} ... ${num_prompts} prompts"
     mkdir -p ${log_path}/concurrency_${concurrency}
-    genai-perf profile \
+    aiperf profile \
     	--model ${model} \
     	--tokenizer ${model_path} \
     	--endpoint-type chat \
@@ -174,9 +174,7 @@ for concurrency in ${concurrency_list}; do
 	    --num-dataset-entries ${num_prompts} \
     	--random-seed 100 \
     	--artifact-dir ${artifacts_dir} \
-    	-- \
     	-v \
-    	--max-threads ${concurrency} \
     	-H 'Authorization: Bearer NOT USED' \
     	-H 'Accept: text/event-stream'
     echo "Benchmark with concurrency ${concurrency} done"

diff --git a/docs/backends/trtllm/README.md b/docs/backends/trtllm/README.md
@@ -196,7 +196,7 @@ NOTE: To send a request to a multi-node deployment, target the node which is run
 
 ### Benchmarking
 
-To benchmark your deployment with GenAI-Perf, see this utility script, configuring the
+To benchmark your deployment with AIPerf, see this utility script, configuring the
 `model` name and `host` based on your deployment: [perf.sh](../../../benchmarks/llm/perf.sh)
 
 
@@ -236,7 +236,7 @@ NOTE: To send a request to a multi-node deployment, target the node which is run
 
 ## Benchmarking
 
-To benchmark your deployment with GenAI-Perf, see this utility script, configuring the
+To benchmark your deployment with AIPerf, see this utility script, configuring the
 `model` name and `host` based on your deployment: [perf.sh](../../../benchmarks/llm/perf.sh)
 
 ## Multimodal support

diff --git a/docs/backends/trtllm/gpt-oss.md b/docs/backends/trtllm/gpt-oss.md
@@ -404,7 +404,7 @@ curl localhost:8000/v1/chat/completions   -H "Content-Type: application/json"
 
 ### Performance Testing with AIPerf
 
-The Dynamo container includes [AIPerf](https://github.com/ai-dynamo/aiperf/tree/main?tab=readme-ov-file#aiperf), NVIDIA's tool for benchmarking generative AI models. This tool helps measure throughput, latency, and other performance metrics for your deployment.
+The Dynamo container includes [AIPerf](https://docs.nvidia.com/deeplearning/triton-inference-server/user-guide/docs/perf_analyzer/aiperf/README.html), NVIDIA's tool for benchmarking generative AI models. This tool helps measure throughput, latency, and other performance metrics for your deployment.
 
 **Run the following benchmark from inside the container** (after completing the deployment steps above):