diff --git a/benchmarks/llm/perf.sh b/benchmarks/llm/perf.sh
index 446ec1f74d0c..a1f66dc01710 100644
--- a/benchmarks/llm/perf.sh
+++ b/benchmarks/llm/perf.sh
@@ -235,9 +235,8 @@ for concurrency in "${concurrency_array[@]}"; do
     --num-dataset-entries $(($concurrency*12)) \
     --random-seed 100 \
     --artifact-dir ${artifact_dir} \
-    -- \
+    --ui simple \
     -v \
-    --max-threads ${concurrency} \
     -H 'Authorization: Bearer NOT USED' \
     -H 'Accept: text/event-stream'
 
diff --git a/benchmarks/profiler/profile_sla.py b/benchmarks/profiler/profile_sla.py
index 8c670986cffa..757c073b2851 100644
--- a/benchmarks/profiler/profile_sla.py
+++ b/benchmarks/profiler/profile_sla.py
@@ -256,7 +256,7 @@ async def run_profile(args):
                     base_url=base_url,
                 )
                 if aiperf_result is not None:
-                    ttft = aiperf_result["records"]["ttft"]["avg"]
+                    ttft = aiperf_result["time_to_first_token"]["avg"]
 
                 logger.info("Cleaning up deployment...")
                 await client.delete_deployment()
@@ -435,11 +435,9 @@ async def run_profile(args):
                             base_url=base_url,
                         )
                         if aiperf_result is not None:
-                            itl = aiperf_result["records"]["inter_token_latency"]["avg"]
+                            itl = aiperf_result["inter_token_latency"]["avg"]
                             thpt_per_gpu = (
-                                aiperf_result["records"]["output_token_throughput"][
-                                    "avg"
-                                ]
+                                aiperf_result["output_token_throughput"]["avg"]
                                 / num_gpus
                             )
 
diff --git a/benchmarks/profiler/utils/profile_decode.py b/benchmarks/profiler/utils/profile_decode.py
index 1a9cbf3d96fa..f0a819ec5dec 100644
--- a/benchmarks/profiler/utils/profile_decode.py
+++ b/benchmarks/profiler/utils/profile_decode.py
@@ -124,10 +124,8 @@ def get_itl_and_thpt_per_gpu(isl, osl, num_request):
             base_url=url,
         )
         if aiperf_result is not None:
-            itl = aiperf_result["records"]["inter_token_latency"]["avg"]
-            thpt_per_gpu = (
-                aiperf_result["records"]["output_token_throughput"]["avg"] / num_gpus
-            )
+            itl = aiperf_result["inter_token_latency"]["avg"]
+            thpt_per_gpu = aiperf_result["output_token_throughput"]["avg"] / num_gpus
             return itl, thpt_per_gpu
         return None, None
 
diff --git a/benchmarks/profiler/utils/profile_prefill.py b/benchmarks/profiler/utils/profile_prefill.py
index d7f5dae91bf0..48171bdd7e63 100644
--- a/benchmarks/profiler/utils/profile_prefill.py
+++ b/benchmarks/profiler/utils/profile_prefill.py
@@ -90,7 +90,7 @@ def get_ttft(isl):
             base_url=url,
         )
         if aiperf_result is not None:
-            return aiperf_result["records"]["ttft"]["avg"]
+            return aiperf_result["time_to_first_token"]["avg"]
         return None
 
     return _profile_prefill_helper(
diff --git a/benchmarks/router/README.md b/benchmarks/router/README.md
index 8ea830b759ac..40d8f127fd6b 100644
--- a/benchmarks/router/README.md
+++ b/benchmarks/router/README.md
@@ -13,7 +13,7 @@ This directory contains scripts for benchmarking the Dynamo router with prefix c
 - etcd and NATS running (required for Dynamo coordination)
 - Required Python packages:
   - `dynamo` package (with vllm and frontend modules)
-  - `genai-perf` for benchmarking
+  - `aiperf` for benchmarking
   - `matplotlib` for plotting results
   - `data-generator` package (install with `pip install -e ./benchmarks` from repo root)
 
@@ -230,11 +230,11 @@ python real_data_benchmark.py --input-dataset trace.jsonl --prefix-root-multipli
 ```
 
 > [!Note]
-> At the time of writing this documentation, you may need to install the latest genai-perf from the main source branch to loadgen on the trace files:
+> At the time of writing this documentation, you may need to install the latest aiperf from the main source branch to loadgen on the trace files:
 > ```bash
-> pip install git+https://github.com/triton-inference-server/perf_analyzer.git#subdirectory=genai-perf
+> pip install git+https://github.com/ai-dynamo/aiperf.git
 > ```
-> However, by the time of release, the genai-perf version included in the vLLM runtime container should be up to date enough to use as-is.
+> However, by the time of release, the aiperf version included in the vLLM runtime container should be up to date enough to use as-is.
 
 ## Troubleshooting
 
diff --git a/benchmarks/sin_load_generator/README.md b/benchmarks/sin_load_generator/README.md
index 7c3ec5cf303a..82b7dee5b9c5 100644
--- a/benchmarks/sin_load_generator/README.md
+++ b/benchmarks/sin_load_generator/README.md
@@ -5,7 +5,7 @@ SPDX-License-Identifier: Apache-2.0
 
 # Sinusoidal Load Generator
 
-`sin_synth.py` is a simple script to generate synthetic load with sinusoidal request rate and isl/osl ratio. The output is in [mooncake-style](https://github.com/kvcache-ai/Mooncake) jsonl format, which can be directly used in [GenAI-Perf](https://github.com/triton-inference-server/perf_analyzer/tree/main/genai-perf/genai_perf).
+`sin_synth.py` is a simple script to generate synthetic load with sinusoidal request rate and isl/osl ratio. The output is in [mooncake-style](https://github.com/kvcache-ai/Mooncake) jsonl format, which can be directly used in [AIPerf](https://github.com/ai-dynamo/aiperf).
 
 ## Usage
 
diff --git a/docs/backends/trtllm/gpt-oss.md b/docs/backends/trtllm/gpt-oss.md
index 6a11712724ae..071b88bb2e2d 100644
--- a/docs/backends/trtllm/gpt-oss.md
+++ b/docs/backends/trtllm/gpt-oss.md
@@ -402,9 +402,9 @@ curl localhost:8000/v1/chat/completions   -H "Content-Type: application/json"
 ```
 ## Benchmarking
 
-### Performance Testing with GenAI-Perf
+### Performance Testing with AIPerf
 
-The Dynamo container includes [GenAI-Perf](https://docs.nvidia.com/deeplearning/triton-inference-server/user-guide/docs/perf_analyzer/genai-perf/README.html), NVIDIA's tool for benchmarking generative AI models. This tool helps measure throughput, latency, and other performance metrics for your deployment.
+The Dynamo container includes [AIPerf](https://github.com/ai-dynamo/aiperf/tree/main?tab=readme-ov-file#aiperf), NVIDIA's tool for benchmarking generative AI models. This tool helps measure throughput, latency, and other performance metrics for your deployment.
 
 **Run the following benchmark from inside the container** (after completing the deployment steps above):
 
@@ -413,7 +413,7 @@ The Dynamo container includes [GenAI-Perf](https://docs.nvidia.com/deeplearning/
 mkdir -p /tmp/benchmark-results
 
 # Run the benchmark - this command tests the deployment with high-concurrency synthetic workload
-genai-perf profile \
+aiperf profile \
     --model openai/gpt-oss-120b \
     --tokenizer /model \
     --endpoint-type chat \
@@ -434,9 +434,7 @@ genai-perf profile \
     --num-dataset-entries 8000 \
     --random-seed 100 \
     --artifact-dir /tmp/benchmark-results \
-    -- \
     -v \
-    --max-threads 500 \
     -H 'Authorization: Bearer NOT USED' \
     -H 'Accept: text/event-stream'
 ```
@@ -457,13 +455,13 @@ Key parameters you can adjust:
 - `--output-tokens-mean`: Average output length (tests decode throughput)
 - `--request-count`: Total number of requests for the benchmark
 
-### Installing GenAI-Perf Outside the Container
+### Installing AIPerf Outside the Container
 
 If you prefer to run benchmarks from outside the container:
 
 ```bash
-# Install GenAI-Perf
-pip install genai-perf
+# Install AIPerf
+pip install aiperf
 
 # Then run the same benchmark command, adjusting the tokenizer path if needed
 ```
@@ -520,4 +518,4 @@ flowchart TD
 - **Production Deployment**: For multi-node deployments, see the [Multi-node Guide](../../../examples/basics/multinode/README.md)
 - **Advanced Configuration**: Explore TensorRT-LLM engine building options for further optimization
 - **Monitoring**: Set up Prometheus and Grafana for production monitoring
-- **Performance Benchmarking**: Use GenAI-Perf to measure and optimize your deployment performance
+- **Performance Benchmarking**: Use AIPerf to measure and optimize your deployment performance
diff --git a/recipes/gpt-oss-120b/trtllm/agg/perf.yaml b/recipes/gpt-oss-120b/trtllm/agg/perf.yaml
index eed5d69addbf..a1dbbd696aba 100644
--- a/recipes/gpt-oss-120b/trtllm/agg/perf.yaml
+++ b/recipes/gpt-oss-120b/trtllm/agg/perf.yaml
@@ -57,19 +57,20 @@ spec:
             aiperf profile --artifact-dir $ARTIFACT_DIR \
                 --model $TARGET_MODEL \
                 --tokenizer /model-cache/hub/models--openai--gpt-oss-120b/snapshots/b5c939de8f754692c1647ca79fbf85e8c1e70f8a  \
-                --endpoint-type chat  --endpoint /v1/chat/completions \
+                --endpoint-type chat  \
+                --endpoint /v1/chat/completions \
                 --streaming \
                 --url http://$ENDPOINT \
                 --synthetic-input-tokens-mean $isl \
                 --synthetic-input-tokens-stddev 0 \
                 --output-tokens-mean $osl \
                 --output-tokens-stddev 0 \
-                --extra-inputs "{\"max_tokens\":$osl}" \
-                --extra-inputs "{\"min_tokens\":$osl}" \
-                --extra-inputs "{\"ignore_eos\":true}" \
+                --extra-inputs "max_tokens:$osl" \
+                --extra-inputs "min_tokens:$osl" \
+                --extra-inputs "ignore_eos:true" \
                 --extra-inputs "{\"nvext\":{\"ignore_eos\":true}}" \
-                --extra-inputs "{\"repetition_penalty\":1.0}" \
-                --extra-inputs "{\"temperature\": 0.0}" \
+                --extra-inputs "repetition_penalty:1.0" \
+                --extra-inputs "temperature: 0.0" \
                 --concurrency $concurrency \
                 --request-count $((10*concurrency)) \
                 --warmup-request-count $concurrency \
diff --git a/recipes/llama-3-70b/vllm/agg/perf.yaml b/recipes/llama-3-70b/vllm/agg/perf.yaml
index 8c5a470f119c..5773214bf438 100644
--- a/recipes/llama-3-70b/vllm/agg/perf.yaml
+++ b/recipes/llama-3-70b/vllm/agg/perf.yaml
@@ -50,7 +50,8 @@ spec:
             aiperf profile --artifact-dir $ARTIFACT_DIR \
                 --model $TARGET_MODEL \
                 --tokenizer /root/.cache/huggingface/hub/models--RedHatAI--Llama-3.3-70B-Instruct-FP8-dynamic/snapshots/ddb4128556dfcff99e0c41aee159ea6c3e655dcd  \
-                --endpoint-type chat  --endpoint /v1/chat/completions \
+                --endpoint-type chat  \
+                --endpoint /v1/chat/completions \
                 --streaming \
                 --url http://$ENDPOINT \
                 --synthetic-input-tokens-mean $isl \
diff --git a/recipes/llama-3-70b/vllm/disagg-multi-node/perf.yaml b/recipes/llama-3-70b/vllm/disagg-multi-node/perf.yaml
index e2326e45873b..8b24296f828b 100644
--- a/recipes/llama-3-70b/vllm/disagg-multi-node/perf.yaml
+++ b/recipes/llama-3-70b/vllm/disagg-multi-node/perf.yaml
@@ -50,7 +50,8 @@ spec:
             aiperf profile --artifact-dir $ARTIFACT_DIR \
                 --model $TARGET_MODEL \
                 --tokenizer /root/.cache/huggingface/hub/models--RedHatAI--Llama-3.3-70B-Instruct-FP8-dynamic/snapshots/ddb4128556dfcff99e0c41aee159ea6c3e655dcd  \
-                --endpoint-type chat  --endpoint /v1/chat/completions \
+                --endpoint-type chat  \
+                --endpoint /v1/chat/completions \
                 --streaming \
                 --url http://$ENDPOINT \
                 --synthetic-input-tokens-mean $isl \
diff --git a/recipes/llama-3-70b/vllm/disagg-single-node/perf.yaml b/recipes/llama-3-70b/vllm/disagg-single-node/perf.yaml
index 61e53aa5a79e..c2ac8445c589 100644
--- a/recipes/llama-3-70b/vllm/disagg-single-node/perf.yaml
+++ b/recipes/llama-3-70b/vllm/disagg-single-node/perf.yaml
@@ -50,7 +50,8 @@ spec:
             aiperf profile --artifact-dir $ARTIFACT_DIR \
                 --model $TARGET_MODEL \
                 --tokenizer /root/.cache/huggingface/hub/models--RedHatAI--Llama-3.3-70B-Instruct-FP8-dynamic/snapshots/ddb4128556dfcff99e0c41aee159ea6c3e655dcd  \
-                --endpoint-type chat  --endpoint /v1/chat/completions \
+                --endpoint-type chat  \
+                --endpoint /v1/chat/completions \
                 --streaming \
                 --url http://$ENDPOINT \
                 --synthetic-input-tokens-mean $isl \
diff --git a/tests/fault_tolerance/deploy/client.py b/tests/fault_tolerance/deploy/client.py
index e3053008c43b..e8a6ec24c231 100644
--- a/tests/fault_tolerance/deploy/client.py
+++ b/tests/fault_tolerance/deploy/client.py
@@ -383,27 +383,19 @@ def log_summary_metrics(
             with open(profile_json) as f:
                 metrics = json.load(f)
 
-            # Extract key metrics from AI-Perf format
-            records = metrics.get("records", {})
-
-            # Request count from request_count record
-            request_count_record = records.get("request_count", {})
-            request_count = (
-                int(request_count_record.get("avg", 0)) if request_count_record else 0
-            )
+            # Request count
+            request_count = int(metrics.get("request_count", {}).get("avg", 0))
 
             # Check for errors
-            error_summary = metrics.get("error_summary", [])
-            error_count = len(error_summary)
+            error_count = len(metrics.get("error_summary", []))
 
             # Latency metrics (in milliseconds)
-            request_latency = records.get("request_latency", {})
+            request_latency = metrics.get("request_latency", {})
             avg_latency = request_latency.get("avg", 0) / 1000.0  # Convert to seconds
             p99_latency = request_latency.get("p99", 0) / 1000.0  # Convert to seconds
 
             # Throughput metrics
-            request_throughput = records.get("request_throughput", {})
-            throughput = request_throughput.get("avg", 0)
+            throughput = metrics.get("request_throughput", {}).get("avg", 0)
 
             # Log summary
             logger.info(
@@ -417,7 +409,7 @@ def log_summary_metrics(
 
             # Log success rate
             if request_count > 0:
-                success_rate = (request_count - error_count) / request_count * 100
+                success_rate = ((request_count - error_count) / request_count) * 100
                 logger.info(f"Success rate: {success_rate:.1f}%")
 
             # Also write summary to CSV file for aggregation
diff --git a/tests/fault_tolerance/deploy/parse_results.py b/tests/fault_tolerance/deploy/parse_results.py
index e41275cd44c7..77f28894d31f 100644
--- a/tests/fault_tolerance/deploy/parse_results.py
+++ b/tests/fault_tolerance/deploy/parse_results.py
@@ -293,63 +293,44 @@ def parse_aiperf_client_results(log_dir: str) -> Dict[str, Any]:
                 with open(profile_json) as f:
                     client_metrics = json.load(f)
 
-                # AI-Perf format has "records" dictionary at the top level
-                records = client_metrics.get("records", {})
-
-                # Extract request count (this is the total requests made)
-                request_count_record = records.get("request_count", {})
-                request_count = (
-                    int(request_count_record.get("avg", 0))
-                    if request_count_record
-                    else 0
+                # Extract request count (this is the total successful requests made)
+                request_count = int(
+                    client_metrics.get("request_count", {}).get("avg", 0)
                 )
 
                 # Check for errors in error_summary
-                error_summary = client_metrics.get("error_summary", [])
-                error_count = len(error_summary)
+                error_count = len(client_metrics.get("error_summary", []))
 
                 # Check if test was cancelled
-                was_cancelled = client_metrics.get("was_cancelled", False)
-                if was_cancelled:
+                if client_metrics.get("was_cancelled", False):
                     error_count = request_count  # Mark all as failed if cancelled
 
                 all_metrics["total_requests"] += request_count
                 all_metrics["successful_requests"] += request_count - error_count
                 all_metrics["failed_requests"] += error_count
 
-                # Extract latency from request_latency record
-                request_latency = records.get("request_latency", {})
-
+                # Extract latency metrics
+                request_latency = client_metrics.get("request_latency", None)
                 if request_latency:
-                    # Convert milliseconds to seconds for consistency
-                    if "avg" in request_latency:
-                        all_metrics["latencies"].append(request_latency["avg"] / 1000.0)
-                    if "p50" in request_latency:
-                        all_metrics["p50_latencies"].append(
-                            request_latency["p50"] / 1000.0
-                        )
-                    if "p90" in request_latency:
-                        all_metrics["p90_latencies"].append(
-                            request_latency["p90"] / 1000.0
-                        )
-                    if "p99" in request_latency:
-                        all_metrics["p99_latencies"].append(
-                            request_latency["p99"] / 1000.0
-                        )
-
-                # Time to first token (if available in records)
-                ttft = records.get("time_to_first_token", {}) or records.get("ttft", {})
-                if ttft and "avg" in ttft:
-                    all_metrics["ttft"].append(ttft["avg"] / 1000.0)  # Convert ms to s
-
-                # Inter-token latency (if available in records)
-                itl = records.get("inter_token_latency", {}) or records.get("itl", {})
-                if itl and "avg" in itl:
-                    all_metrics["itl"].append(itl["avg"] / 1000.0)  # Convert ms to s
+                    all_metrics["latencies"].append(request_latency["avg"] / 1000.0)
+                    all_metrics["p50_latencies"].append(request_latency["p50"] / 1000.0)
+                    all_metrics["p90_latencies"].append(request_latency["p90"] / 1000.0)
+                    all_metrics["p99_latencies"].append(request_latency["p99"] / 1000.0)
+
+                # Time to first token
+                ttft = client_metrics.get("time_to_first_token", {}).get("avg", None)
+                if ttft:
+                    all_metrics["ttft"].append(ttft / 1000.0)  # Convert ms to s
+
+                # Inter-token latency
+                itl = client_metrics.get("inter_token_latency", {}).get("avg", None)
+                if itl:
+                    all_metrics["itl"].append(itl / 1000.0)  # Convert ms to s
 
                 # Throughput from request_throughput record
-                request_throughput = records.get("request_throughput", {})
-                req_throughput = request_throughput.get("avg", 0)
+                req_throughput = client_metrics.get("request_throughput", {}).get(
+                    "avg", 0
+                )
                 if req_throughput:
                     all_metrics["throughputs"].append(req_throughput)
 
diff --git a/tests/planner/README.md b/tests/planner/README.md
index 4c1566cc1beb..e9fdcbe44372 100644
--- a/tests/planner/README.md
+++ b/tests/planner/README.md
@@ -215,10 +215,10 @@ When running deployment with sla-planner, to reduce the image pulling time, depl
 kubectl apply -f ./perf_test_configs/image_cache_daemonset.yaml -n <namespace>
 ```
 
-Then, port-forward or shell into the frontend pod and run GenAI-Perf to get the goodput:
+Then, port-forward or shell into the frontend pod and run AIPerf to get the goodput:
 
 ```bash
-genai-perf profile \
+aiperf profile \
   --model nvidia/Llama-3.1-8B-Instruct-FP8 \
   --tokenizer nvidia/Llama-3.1-8B-Instruct-FP8 \
   --endpoint-type chat \
@@ -227,11 +227,11 @@ genai-perf profile \
   --input-file payload:/workspace/rr-5-45_i3000o300.jsonl \ # path to the generated load dataset \
   --fixed-schedule True \
   --goodput time_to_first_token:200 inter_token_latency:10 \
-  -- -v -max-threads 64 \
+  -v
 ```
 
 > [!NOTE]
-> Sometimes, when sla planner scales down the number of workers, a few requests will error out and cause GenAI-Perf to stuck. We are aware of this issue and are working on fixing it.
+> Sometimes, when sla planner scales down the number of workers, a few requests will error out and cause AIPerf to stuck. We are aware of this issue and are working on fixing it.
 
 #### E2E Perf Test Results
 
diff --git a/tests/planner/utils/load_generator.py b/tests/planner/utils/load_generator.py
index 69943de09411..6ac5e38a76d0 100644
--- a/tests/planner/utils/load_generator.py
+++ b/tests/planner/utils/load_generator.py
@@ -4,7 +4,7 @@
 """
 Load generation script for SLA planner scaling tests.
 
-This script uses genai-perf to generate load at specific request rates
+This script uses aiperf to generate load at specific request rates
 to test the planner's scaling behavior.
 """
 
@@ -24,7 +24,7 @@
 
 
 class LoadGenerator:
-    """Generate load using genai-perf to test planner scaling."""
+    """Generate load using aiperf to test planner scaling."""
 
     def __init__(
         self,
@@ -40,12 +40,12 @@ def __init__(
         self.osl = osl
         self.save_results = save_results
 
-    def _calculate_genai_perf_params(
+    def _calculate_aiperf_params(
         self,
         req_per_sec: float,
     ) -> Dict[str, Any]:
         """
-        Calculate genai-perf parameters to approximate desired request rate.
+        Calculate aiperf parameters to approximate desired request rate.
 
         Args:
             req_per_sec: Desired requests per second
@@ -71,15 +71,15 @@ async def generate_load(
         Args:
             req_per_sec: Target requests per second
             duration_sec: Duration to generate load (seconds)
-            artifact_dir: Directory to store genai-perf artifacts
+            artifact_dir: Directory to store aiperf artifacts
 
         Returns:
             Dictionary with load test results
         """
         logger.info(f"Generating load: {req_per_sec} req/s for {duration_sec}s")
 
-        # Calculate genai-perf parameters
-        params = self._calculate_genai_perf_params(req_per_sec)
+        # Calculate aiperf parameters
+        params = self._calculate_aiperf_params(req_per_sec)
         logger.info(f"Using request_rate={params['request_rate']} req/s")
 
         # Create artifact directory if not provided
@@ -95,9 +95,9 @@ async def generate_load(
             f"Adjusted parameters: duration={duration_sec}s, request_count={request_count}"
         )
 
-        # Build genai-perf command based on coworker's successful approach
+        # Build aiperf command based on coworker's successful approach
         cmd = [
-            "genai-perf",
+            "aiperf",
             "profile",
             "--model",
             self.model,
@@ -116,18 +116,13 @@ async def generate_load(
             str(params["request_rate"]),
             "--request-count",
             str(request_count),  # Use request count to limit test duration
-            "--stability-percentage",
-            "50",
             "--num-dataset-entries",
             str(
                 max(20, int(params["request_rate"] * 10))
             ),  # Generate reasonable dataset size
             "--artifact-dir",
             artifact_dir,
-            "--",
             "-v",
-            "-max-threads",
-            "64",
         ]
 
         logger.info(f"Running command: {' '.join(cmd)}")
@@ -135,7 +130,7 @@ async def generate_load(
             f"Expected duration: {duration_sec}s, timeout: {max(duration_sec * 2 + 120, int(duration_sec * 2.5))}s"
         )
 
-        # Run genai-perf (async)
+        # Run aiperf (async)
         start_time = time.time()
         # More generous timeout for high-load tests - allow 2x duration + 2 minutes buffer
         timeout = max(duration_sec * 2 + 120, int(duration_sec * 2.5))
@@ -152,7 +147,7 @@ async def generate_load(
             except asyncio.TimeoutError:
                 proc.kill()
                 await proc.communicate()
-                logger.error("genai-perf timed out")
+                logger.error("aiperf timed out")
                 raise RuntimeError("Load generation timed out")
 
             end_time = time.time()
@@ -160,13 +155,9 @@ async def generate_load(
 
             # Persist logs for debugging
             try:
-                with open(
-                    os.path.join(artifact_dir, "genai_perf.stdout.log"), "wb"
-                ) as f:
+                with open(os.path.join(artifact_dir, "aiperf.stdout.log"), "wb") as f:
                     f.write(stdout or b"")
-                with open(
-                    os.path.join(artifact_dir, "genai_perf.stderr.log"), "wb"
-                ) as f:
+                with open(os.path.join(artifact_dir, "aiperf.stderr.log"), "wb") as f:
                     f.write(stderr or b"")
             except Exception:
                 pass
@@ -174,31 +165,31 @@ async def generate_load(
             if proc.returncode == 0:
                 logger.info("Load generation completed successfully")
                 logger.info(f"Actual duration: {actual_duration:.2f}s")
-                results = self._parse_genai_perf_results(artifact_dir)
+                results = self._parse_aiperf_results(artifact_dir)
                 results.update(
                     {
                         "requested_req_per_sec": req_per_sec,
                         "actual_duration": actual_duration,
                         "target_duration": duration_sec,
-                        "genai_perf_params": params,
+                        "aiperf_params": params,
                         "artifact_dir": artifact_dir,
                         "success": True,
                     }
                 )
                 return results
             else:
-                logger.error(f"genai-perf failed with return code {proc.returncode}")
-                raise RuntimeError("genai-perf failed; see logs in artifact dir")
+                logger.error(f"aiperf failed with return code {proc.returncode}")
+                raise RuntimeError("aiperf failed; see logs in artifact dir")
         except RuntimeError:
             raise
         except Exception as e:
-            logger.error(f"genai-perf execution error: {e}")
+            logger.error(f"aiperf execution error: {e}")
             raise
 
-    def _parse_genai_perf_results(self, artifact_dir: str) -> Dict[str, Any]:
-        """Parse genai-perf results from artifact directory."""
+    def _parse_aiperf_results(self, artifact_dir: str) -> Dict[str, Any]:
+        """Parse aiperf results from artifact directory."""
         try:
-            # Look for the profile_export_genai_perf.json file
+            # Look for the profile_export_aiperf.json file
             json_files = [f for f in os.listdir(artifact_dir) if f.endswith(".json")]
             if not json_files:
                 logger.warning("No JSON results found in artifact directory")
@@ -207,7 +198,7 @@ def _parse_genai_perf_results(self, artifact_dir: str) -> Dict[str, Any]:
             # Main results file
             results_file = None
             for json_file in json_files:
-                if "profile_export" in json_file or "genai_perf" in json_file:
+                if "profile_export" in json_file or "aiperf" in json_file:
                     results_file = os.path.join(artifact_dir, json_file)
                     break
 
@@ -217,40 +208,21 @@ def _parse_genai_perf_results(self, artifact_dir: str) -> Dict[str, Any]:
             logger.info(f"Parsing results from: {results_file}")
 
             with open(results_file, "r") as f:
-                data = json.load(f)
-
-            results = {}
-            if "experiments" in data and data["experiments"]:
-                exp = data["experiments"][0]
-                if "perf_metrics" in exp:
-                    metrics = exp["perf_metrics"]
-                    results.update(
-                        {
-                            "throughput": metrics.get("throughput", {}).get("avg", 0),
-                            "ttft_mean": metrics.get("ttft", {}).get("avg", 0),
-                            "itl_mean": metrics.get("inter_token_latency", {}).get(
-                                "avg", 0
-                            ),
-                            "end_to_end_latency_mean": metrics.get(
-                                "request_latency", {}
-                            ).get("avg", 0),
-                        }
-                    )
-            if not results and "profile_export_genai_perf" in data:
-                summary = data.get("summary", {})
-                results.update(
-                    {
-                        "throughput": summary.get("throughput", 0),
-                        "ttft_mean": summary.get("time_to_first_token_ms", 0),
-                        "itl_mean": summary.get("inter_token_latency_ms", 0),
-                    }
-                )
-
+                metrics = json.load(f)
+
+            results = {
+                "throughput": metrics.get("output_token_throughput", {}).get("avg", 0),
+                "ttft_mean": metrics.get("time_to_first_token", {}).get("avg", 0),
+                "itl_mean": metrics.get("inter_token_latency", {}).get("avg", 0),
+                "end_to_end_latency_mean": metrics.get("request_latency", {}).get(
+                    "avg", 0
+                ),
+            }
             logger.info(f"Parsed results: {results}")
             return results
 
         except Exception as e:
-            logger.warning(f"Failed to parse genai-perf results: {e}")
+            logger.warning(f"Failed to parse aiperf results: {e}")
             return {}
 
     async def run_scaling_test(self) -> Dict[str, Any]: