Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ Rerun with `curl -N` and change `stream` in the request to `true` to get the res

Dynamo provides comprehensive benchmarking tools to evaluate and optimize your deployments:

- **[Benchmarking Guide](docs/benchmarks/benchmarking.md)** – Compare deployment topologies (aggregated vs. disaggregated vs. vanilla vLLM) using GenAI-Perf
- **[Benchmarking Guide](docs/benchmarks/benchmarking.md)** – Compare deployment topologies (aggregated vs. disaggregated vs. vanilla vLLM) using AIPerf
- **[Pre-Deployment Profiling](docs/benchmarks/pre_deployment_profiling.md)** – Optimize configurations before deployment to meet SLA requirements

# Engines
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

# Benchmarks

This directory contains benchmarking scripts and tools for performance evaluation of Dynamo deployments. The benchmarking framework is a wrapper around genai-perf that makes it easy to benchmark DynamoGraphDeployments or other deployments with exposed endpoints.
This directory contains benchmarking scripts and tools for performance evaluation of Dynamo deployments. The benchmarking framework is a wrapper around aiperf that makes it easy to benchmark DynamoGraphDeployments or other deployments with exposed endpoints.

## Quick Start

Expand Down
4 changes: 2 additions & 2 deletions benchmarks/llm/perf.sh
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ if [ $index -gt 0 ]; then
echo "--------------------------------"
fi

echo "Running genai-perf with:"
echo "Running aiperf with:"
echo "Model: $model"
echo "ISL: $isl"
echo "OSL: $osl"
Expand All @@ -214,7 +214,7 @@ for concurrency in "${concurrency_array[@]}"; do

# NOTE: For Dynamo HTTP OpenAI frontend, use `nvext` for fields like
# `ignore_eos` since they are not in the official OpenAI spec.
genai-perf profile \
aiperf profile \
--model ${model} \
--tokenizer ${model} \
--endpoint-type chat \
Expand Down
36 changes: 17 additions & 19 deletions benchmarks/llm/plot_pareto.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,23 +26,21 @@


def get_json_paths(search_paths):
genai_perf_profile_export_json_paths = []
aiperf_profile_export_json_paths = []
deployment_config_json_paths = []
for search_path in search_paths:
deployment_config_json_path = os.path.join(
search_path, "deployment_config.json"
)
if not os.path.exists(deployment_config_json_path):
raise Exception(f"deployment_config.json not found in {search_path}")
for root, dirs, files in os.walk(search_path):
for root, _, files in os.walk(search_path):
for file in files:
if file == "profile_export_genai_perf.json":
genai_perf_profile_export_json_paths.append(
os.path.join(root, file)
)
if file == "profile_export_aiperf.json":
aiperf_profile_export_json_paths.append(os.path.join(root, file))
deployment_config_json_paths.append(deployment_config_json_path)

return genai_perf_profile_export_json_paths, deployment_config_json_paths
return aiperf_profile_export_json_paths, deployment_config_json_paths


# search for -concurrency<number> in the name
Expand Down Expand Up @@ -81,13 +79,13 @@ def parse_kind_and_mode(deployment_config_json_path):


def extract_val_and_concurrency(
genai_perf_profile_export_json_paths, deployment_config_json_paths, stat_value="avg"
aiperf_profile_export_json_paths, deployment_config_json_paths, stat_value="avg"
):
results = []
for genai_perf_profile_export_json_path, deployment_config_json_path in zip(
genai_perf_profile_export_json_paths, deployment_config_json_paths
for aiperf_profile_export_json_path, deployment_config_json_path in zip(
aiperf_profile_export_json_paths, deployment_config_json_paths
):
with open(genai_perf_profile_export_json_path, "r") as f:
with open(aiperf_profile_export_json_path, "r") as f:
data = json.load(f)
# output_token_throughput contains only avg
output_token_throughput = data.get("output_token_throughput", {}).get("avg")
Expand All @@ -99,7 +97,7 @@ def extract_val_and_concurrency(
# request_throughput contains only avg
request_throughput = data.get("request_throughput", {}).get("avg")

concurrency = parse_concurrency(genai_perf_profile_export_json_path)
concurrency = parse_concurrency(aiperf_profile_export_json_path)
num_gpus = parse_gpus(deployment_config_json_path)
kind, mode = parse_kind_and_mode(deployment_config_json_path)

Expand All @@ -116,7 +114,7 @@ def extract_val_and_concurrency(

results.append(
{
"configuration": genai_perf_profile_export_json_path,
"configuration": aiperf_profile_export_json_path,
"kind": kind,
"mode": mode,
"num_gpus": num_gpus,
Expand Down Expand Up @@ -241,12 +239,12 @@ def pareto_efficient(ids, points):
import os

parser = argparse.ArgumentParser(
description="Plot Pareto graph from GenAI-Perf artifacts"
description="Plot Pareto graph from AIPerf artifacts"
)
parser.add_argument(
"--artifacts-root-dir",
required=True,
help="Root directory containing artifact directories to search for profile_export_genai_perf.json files",
help="Root directory containing artifact directories to search for profile_export_aiperf.json files",
)
parser.add_argument(
"--title",
Expand All @@ -260,16 +258,16 @@ def pareto_efficient(ids, points):
if not artifacts_dirs:
raise ValueError(f"No artifacts directories found in {args.artifacts_root_dir}")

genai_perf_profile_export_json_paths, deployment_config_json_paths = get_json_paths(
aiperf_profile_export_json_paths, deployment_config_json_paths = get_json_paths(
artifacts_dirs
)

if len(genai_perf_profile_export_json_paths) != len(deployment_config_json_paths):
if len(aiperf_profile_export_json_paths) != len(deployment_config_json_paths):
raise ValueError(
f"Number of genai_perf_profile_export_json_paths ({len(genai_perf_profile_export_json_paths)}) does not match number of deployment_config_json_paths ({len(deployment_config_json_paths)})"
f"Number of aiperf_profile_export_json_paths ({len(aiperf_profile_export_json_paths)}) does not match number of deployment_config_json_paths ({len(deployment_config_json_paths)})"
)

extracted_values = extract_val_and_concurrency(
genai_perf_profile_export_json_paths, deployment_config_json_paths
aiperf_profile_export_json_paths, deployment_config_json_paths
)
create_pareto_graph(extracted_values, title=args.title)
4 changes: 2 additions & 2 deletions benchmarks/profiler/profile_endpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
import logging
import os

from utils.profile_decode import profile_decode
from utils.profile_prefill import profile_prefill
from benchmarks.profiler.utils.profile_decode import profile_decode
from benchmarks.profiler.utils.profile_prefill import profile_prefill

logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
Expand Down
29 changes: 16 additions & 13 deletions benchmarks/profiler/profile_sla.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,13 @@
import numpy as np
import yaml

from benchmarks.profiler.utils.aiperf import benchmark_decode, benchmark_prefill
from benchmarks.profiler.utils.config import (
CONFIG_MODIFIERS,
WORKER_COMPONENT_NAMES,
generate_dgd_config_with_planner,
)
from benchmarks.profiler.utils.estimate_perf import AIConfiguratorPerfEstimator
from benchmarks.profiler.utils.genai_perf import benchmark_decode, benchmark_prefill
from benchmarks.profiler.utils.planner_utils import add_planner_arguments_to_parser
from benchmarks.profiler.utils.plot import (
plot_decode_performance,
Expand Down Expand Up @@ -245,18 +245,18 @@ async def run_profile(args):
f"Logs have been saved to {client.base_log_dir / client.deployment_name}"
)

# run genai-perf
# run ai-perf
base_url = client.get_service_url()
genai_perf_artifact_dir = f"{work_dir}/gap_isl{args.isl}"
gap_result = benchmark_prefill(
ai_perf_artifact_dir = f"{work_dir}/aiperf_isl{args.isl}"
aiperf_result = benchmark_prefill(
args.isl,
genai_perf_artifact_dir,
ai_perf_artifact_dir,
model_name,
model_name,
base_url=base_url,
)
if gap_result is not None:
ttft = gap_result["time_to_first_token"]["avg"]
if aiperf_result is not None:
ttft = aiperf_result["records"]["ttft"]["avg"]

logger.info("Cleaning up deployment...")
await client.delete_deployment()
Expand Down Expand Up @@ -424,20 +424,23 @@ async def run_profile(args):
)
else:
base_url = client.get_service_url()
genai_perf_artifact_dir = f"{work_dir}/gap_request{num_request}_isl{args.isl}_osl{args.osl}_n{num_request}"
gap_result = benchmark_decode(
ai_perf_artifact_dir = f"{work_dir}/aiperf_request{num_request}_isl{args.isl}_osl{args.osl}_n{num_request}"
aiperf_result = benchmark_decode(
args.isl,
args.osl,
num_request,
genai_perf_artifact_dir,
ai_perf_artifact_dir,
model_name,
model_name,
base_url=base_url,
)
if gap_result is not None:
itl = gap_result["inter_token_latency"]["avg"]
if aiperf_result is not None:
itl = aiperf_result["records"]["inter_token_latency"]["avg"]
thpt_per_gpu = (
gap_result["output_token_throughput"]["avg"] / num_gpus
aiperf_result["records"]["output_token_throughput"][
"avg"
]
/ num_gpus
)

if itl is not None and thpt_per_gpu is not None:
Expand Down
Loading
Loading