Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
112 commits
Select commit Hold shift + click to select a range
b81b882
[Config Refactor][2/N] PipelineConfig + DeployConfig dataclasses + qw…
lishunyang12 Mar 31, 2026
be79bf6
Wire pipeline registry + deploy config for qwen3_omni, delete legacy …
lishunyang12 Mar 31, 2026
6845cd0
Fix stale qwen3_omni doc references
lishunyang12 Mar 31, 2026
0101625
Add --deploy-config, --stage-overrides CLI args and wire e2e
lishunyang12 Mar 31, 2026
df7adff
Align to final RFC design: model_arch→pipeline, connectors→deploy, sa…
lishunyang12 Mar 31, 2026
59102e3
Clean up: trim docstrings/comments, merge tests into test_config_factory
lishunyang12 Mar 31, 2026
a9f1c57
Remove RFC/name references from comments
lishunyang12 Mar 31, 2026
1195d35
Add TODOs for follow-up migration work
lishunyang12 Mar 31, 2026
eac199f
Add @lishunyang12 to TODOs
lishunyang12 Mar 31, 2026
5956f0d
Strip remaining comments from stage_config.py
lishunyang12 Mar 31, 2026
1f9e080
Strip remaining comments
lishunyang12 Mar 31, 2026
e368a7b
Refactor merge_pipeline_deploy: use asdict loop instead of field-by-f…
lishunyang12 Mar 31, 2026
20020c8
Simplify deploy config parsing and platform overrides
lishunyang12 Mar 31, 2026
d3f1833
Move deploy format detection to engine layer, minimize conftest changes
lishunyang12 Mar 31, 2026
3e7e22b
Add base_config inheritance for deploy YAMLs, slim CI configs to over…
lishunyang12 Mar 31, 2026
3c3417e
Remove default-value fields from main deploy YAML
lishunyang12 Mar 31, 2026
2456286
Add tests for base_config inheritance, platform overrides, CLI flow, …
lishunyang12 Mar 31, 2026
047655f
Fix base_config relative paths in dfx deploy configs
lishunyang12 Mar 31, 2026
8043433
Fix pre-existing test: make MockParallelConfig a dataclass
lishunyang12 Mar 31, 2026
00729ea
Fix: import pipeline module before checking registry
lishunyang12 Mar 31, 2026
bc9442b
Fix device assignment: fall back to flat devices when engine_args is …
lishunyang12 Mar 31, 2026
0726182
Fix OmegaConf serialization: convert dataclasses in cli_overrides
lishunyang12 Mar 31, 2026
52fc87d
Fix: filter argparse defaults from CLI overrides in registry path
lishunyang12 Mar 31, 2026
57e83cf
Fix: only apply per-stage CLI overrides in registry path
lishunyang12 Mar 31, 2026
df29048
Add repro script for online serving config path
lishunyang12 Mar 31, 2026
1f00d04
Address review feedback
lishunyang12 Apr 10, 2026
7ef989e
Move qwen3_omni CI deploy YAMLs to vllm_omni/deploy/ci/
lishunyang12 Apr 10, 2026
cd5d06e
Unify CI deploy YAMLs and fix CLI/YAML precedence
lishunyang12 Apr 10, 2026
8fa180f
Fix pre-commit: collapse wrapped strings in qwen3_omni/pipeline.py
lishunyang12 Apr 10, 2026
3f6fd2d
Add CPU-only smoke scripts for deploy schema and CLI explicit-key par…
lishunyang12 Apr 10, 2026
e2305b3
Add e2e serve smoke script for PR #2383
lishunyang12 Apr 10, 2026
70bc4c8
e2e_serve_smoke: print log snippets during readiness wait
lishunyang12 Apr 10, 2026
3c17d12
Migrate qwen2_5_omni and qwen3_tts to pipeline+deploy schema
lishunyang12 Apr 10, 2026
d277f9d
Update docs and examples to point at new deploy paths
lishunyang12 Apr 10, 2026
f805cd1
Complete refactoring for qwen3_omni_moe / qwen2_5_omni / qwen3_tts
lishunyang12 Apr 10, 2026
95d66c2
Address amy-why-3459 review feedback
lishunyang12 Apr 10, 2026
8eae5b4
Make prefix_caching and async_chunk CLI-overridable
lishunyang12 Apr 10, 2026
5f9abb8
Loosen qwen2_5_omni shared-cuda:0 budgets for flashinfer warmup
lishunyang12 Apr 10, 2026
6e5dd59
Drop qwen2_5_omni stage 0 to 0.48 for token2wav warmup spike
lishunyang12 Apr 10, 2026
a2b34f0
Disable flashinfer autotune on qwen2_5_omni token2wav stage
lishunyang12 Apr 10, 2026
dbb2ba2
Auto-set VLLM_ALLOW_LONG_MAX_MODEL_LEN for stages with explicit max_m…
lishunyang12 Apr 10, 2026
e986a16
Drop multiconnector deploy yamls in favor of base_config overlays
lishunyang12 Apr 10, 2026
1a7246c
Add --pipeline CLI flag to override deploy yaml pipeline selector
lishunyang12 Apr 10, 2026
52a4a5d
Drop qwen3_tts variant yamls in favor of CLI flag composition
lishunyang12 Apr 10, 2026
73c9ab3
Address #2383 review feedback
lishunyang12 Apr 13, 2026
dff7278
Move CI overlays from deploy/ci/*.yaml into tests/utils.py Python dicts
lishunyang12 Apr 13, 2026
b6733e6
Unify qwen3_tts pipeline: dispatch processors from async_chunk bool
lishunyang12 Apr 13, 2026
29c1816
Trim verbose comments
lishunyang12 Apr 13, 2026
466e828
Restore DFX deploy overlays and wire scripts to --deploy-config
lishunyang12 Apr 13, 2026
4b755b0
Restore DeployConfig.pipeline for variant topologies; drop dead qwen3…
lishunyang12 Apr 13, 2026
85aebc3
Collapse DFX tests; drop deploy overlays and obsolete update blocks
lishunyang12 Apr 13, 2026
d5c9fa3
Incorporate #2740 helpers: build_stage_runtime_overrides, strip_paren…
lishunyang12 Apr 13, 2026
e0e7955
Collapse qwen3_tts_bs{1,4,16}.yaml into BATCH_SIZE env + --stage-over…
lishunyang12 Apr 13, 2026
5b5bddb
Move thinker-only test yaml into _CI_OVERLAYS; drop orphan docs example
lishunyang12 Apr 13, 2026
5389122
Trim verbose comments and doc prose
lishunyang12 Apr 13, 2026
8772afa
Restore StageType re-export from vllm_omni.config
lishunyang12 Apr 13, 2026
3654c57
Replace stage CLI blacklists with OrchestratorArgs dataclass
lishunyang12 Apr 14, 2026
792ee10
Auto-discover pipelines; extract parent-args contracts
lishunyang12 Apr 14, 2026
91ad09c
Address reviews: tokenizer guard, direct setattr, qwen3_tts alias
lishunyang12 Apr 14, 2026
7e7e1be
Enable async_chunk by default for qwen2_5_omni
lishunyang12 Apr 14, 2026
7e782c7
Address reviews: JSON parse error, README wording
lishunyang12 Apr 14, 2026
ccb517e
Move lazy imports to top of file
lishunyang12 Apr 14, 2026
3eb75af
Address reviews: docs schema, override example, benchmark cleanup
lishunyang12 Apr 14, 2026
b1c9d1c
Rename qwen3_omni_moe.yaml to qwen3_omni.yaml for consistency
lishunyang12 Apr 14, 2026
ac0bd4e
Revert "Rename qwen3_omni_moe.yaml to qwen3_omni.yaml for consistency"
lishunyang12 Apr 14, 2026
3a3791b
Refactor merge_pipeline_deploy; add BVA tests
lishunyang12 Apr 14, 2026
404c1ed
Share detect_explicit_cli_keys between online and offline entry points
lishunyang12 Apr 15, 2026
6241b68
Pass _cli_explicit_keys in offline examples for the 3 migrated models
lishunyang12 Apr 15, 2026
095d1ea
Add Omni.from_args / AsyncOmni.from_args for error-proof argparse entry
lishunyang12 Apr 15, 2026
71f73c1
Rename from_args to from_cli_args to align with OmniEngineArgs
lishunyang12 Apr 15, 2026
4eb6395
Drop redundant default values from deploy YAMLs
lishunyang12 Apr 15, 2026
f7aab26
Rename arg_classification to arg_routing
lishunyang12 Apr 15, 2026
0e5e9d7
Merge arg_routing into arg_utils
lishunyang12 Apr 15, 2026
baac75e
Remove redundant edges: section from deploy YAMLs
lishunyang12 Apr 15, 2026
ac563e0
Drop more redundant default values from deploy YAMLs
lishunyang12 Apr 15, 2026
e1e6bc6
Keep devices/gpu_memory_utilization/shm_threshold_bytes for deploymen…
lishunyang12 Apr 15, 2026
18e82b6
Promote pipeline-wide settings from per-stage to top-level DeployConfig
lishunyang12 Apr 15, 2026
59a678d
Document implicit defaults and enforce_eager choices in deploy YAMLs
lishunyang12 Apr 15, 2026
78c8c5e
Drop redundant 'Parse X' section comments in initialization.py
lishunyang12 Apr 15, 2026
bf883c3
Revert "Drop redundant 'Parse X' section comments in initialization.py"
lishunyang12 Apr 15, 2026
f048711
Merge branch 'main' into config-refactor-2a
lishunyang12 Apr 15, 2026
20ab452
Fix OmniServerStageCli to recognize new deploy YAML 'stages' key
lishunyang12 Apr 15, 2026
9ede944
Fix advanced_model fixture to strip load_format from new-schema yamls
lishunyang12 Apr 15, 2026
59804d4
Capture stage subprocess logs for debugging failing startups
lishunyang12 Apr 15, 2026
5cfacb7
Apply ruff format to stage subprocess error raise
lishunyang12 Apr 15, 2026
5e1feec
docs: clarify runtime.devices are logical indices
lishunyang12 Apr 17, 2026
56b07e6
test(dfx): clarify --deploy-config and --stage-overrides compose
lishunyang12 Apr 17, 2026
89ed8bb
test(dfx): clarify --deploy-config and --stage-overrides compose in s…
lishunyang12 Apr 17, 2026
077d08d
test(qwen3_tts): drop dead get_stage_config wrapper; call get_deploy_…
lishunyang12 Apr 17, 2026
648fb0f
[qwen3_tts] remove redundant no_async_chunk pipeline alias (async_chu…
lishunyang12 Apr 17, 2026
386a903
cli: flag --stage-configs-path as deprecated in its help string
lishunyang12 Apr 17, 2026
fd2b847
entrypoints: make detect_explicit_cli_keys parser-aware to resolve re…
lishunyang12 Apr 17, 2026
d450e3e
entrypoints: raise on --stage-configs-path + --deploy-config both set
lishunyang12 Apr 17, 2026
93342ff
config: map StageExecutionType to scheduler class (not dotted-path st…
lishunyang12 Apr 17, 2026
0f6063d
config: raise on invalid stage_id / unmapped execution_type in get_sc…
lishunyang12 Apr 17, 2026
dc51960
config: warn on silent clobber in _deep_merge_stage when types mismatch
lishunyang12 Apr 17, 2026
2077d35
config: raise if async_chunk=True but no stage declares an async handler
lishunyang12 Apr 17, 2026
a43e29e
Merge branch 'main' into config-refactor-2a
lishunyang12 Apr 17, 2026
aed3671
cli: stash parser via type(self) so docs hook doesn't hit NameError
lishunyang12 Apr 17, 2026
acba8f4
config: accept custom_process_next_stage_input_func as valid async_ch…
lishunyang12 Apr 17, 2026
4e11fdd
Config CI Fixes (#53)
alex-jw-brooks Apr 18, 2026
bbb0fb9
test(qwen3_omni): drop stage_args engine_args override; rely on pipel…
lishunyang12 Apr 18, 2026
24aa5e3
Merge branch 'main' into config-refactor-2a
lishunyang12 Apr 18, 2026
fb426c7
test(qwen3_omni_realtime): migrate from deleted legacy yaml to ci ove…
lishunyang12 Apr 18, 2026
963df1d
Merge branch 'main' into config-refactor-2a
lishunyang12 Apr 18, 2026
ca9e775
test(qwen3_omni): restore async_chunk variant in expansion test; log …
lishunyang12 Apr 18, 2026
11a16c1
test(dfx): add extra_cli_args passthrough; restore qwen3_omni async_c…
lishunyang12 Apr 18, 2026
22fb84d
deploy(qwen2_5_omni): default async_chunk to false; feature not yet s…
lishunyang12 Apr 18, 2026
b53967d
test(dfx): scope qwen3_omni json diffs to yaml-path-only; preserve ma…
lishunyang12 Apr 18, 2026
8c6b746
Merge branch 'main' into config-refactor-2a
hsliuustc0106 Apr 18, 2026
93106e2
test(conftest): tail OmniServerStageCli per-stage logs to stdout on t…
lishunyang12 Apr 18, 2026
bfaccfd
serve(headless): forward cli_explicit_keys so argparse defaults don't…
lishunyang12 Apr 18, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -263,3 +263,5 @@ tmp_test
vllm_omni/_version.py
# output files
*.wav
# CI overlay yamls materialized from tests/utils.py:_CI_OVERLAYS at test time
tests/.ci_generated/
24 changes: 14 additions & 10 deletions benchmarks/qwen3-tts/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ MODEL=Qwen/Qwen3-TTS-12Hz-1.7B-CustomVoice bash run_benchmark.sh --async-only
# Use a Voice Clone model
MODEL=Qwen/Qwen3-TTS-12Hz-1.7B-Base TASK_TYPE=Base bash run_benchmark.sh --async-only

# Use bs16 config for higher throughput
STAGE_CONFIG=vllm_omni/configs/qwen3_tts_bs16.yaml bash run_benchmark.sh --async-only
# Use batch size 16 for higher throughput
BATCH_SIZE=16 bash run_benchmark.sh --async-only
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is this BATCH_SIZE actually about concurrency? @linyueqian


# Custom GPU, prompt count, concurrency levels
GPU_DEVICE=1 NUM_PROMPTS=20 CONCURRENCY="1 4" bash run_benchmark.sh
Expand All @@ -50,7 +50,8 @@ GPU_DEVICE=1 NUM_PROMPTS=20 CONCURRENCY="1 4" bash run_benchmark.sh
CUDA_VISIBLE_DEVICES=0 python -m vllm_omni.entrypoints.cli.main serve \
"Qwen/Qwen3-TTS-12Hz-0.6B-CustomVoice" \
--omni --host 127.0.0.1 --port 8000 \
--stage-configs-path benchmarks/qwen3-tts/vllm_omni/configs/qwen3_tts_bs1.yaml \
--deploy-config vllm_omni/deploy/qwen3_tts.yaml \
--stage-overrides '{"0":{"max_num_seqs":1,"gpu_memory_utilization":0.3,"max_num_batched_tokens":512},"1":{"max_num_seqs":1,"gpu_memory_utilization":0.3,"max_num_batched_tokens":8192}}' \
--trust-remote-code
```

Expand Down Expand Up @@ -84,16 +85,19 @@ python benchmarks/qwen3-tts/plot_results.py \
--output results/comparison.png
```

## Stage Configs
## Batch-size presets

| Config | max_num_seqs | Description |
|--------|:------------:|-------------|
| `vllm_omni/configs/qwen3_tts_bs1.yaml` | 1 | Single-request processing (lowest latency) |
| `vllm_omni/configs/qwen3_tts_bs16.yaml` | 16 | High-throughput concurrent processing |
The bench script loads the bundled production deploy (`vllm_omni/deploy/qwen3_tts.yaml`) and layers per-stage budgets on top via `--stage-overrides`, driven by the `BATCH_SIZE` env var. Each batch size picks compatible per-stage `max_num_seqs`, `max_num_batched_tokens`, and `gpu_memory_utilization` defaults:

All configs use a 2-stage pipeline (Talker -> Code2Wav) with `async_chunk` streaming enabled. The `SharedMemoryConnector` streams codec frames (25-frame chunks with 25-frame context overlap) between stages.
| `BATCH_SIZE` | Description |
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same here

|:--:|-------------|
| `1` (default) | Single-request processing (lowest latency) |
| `4` | Moderate-throughput concurrent processing |
| `16` | High-throughput concurrent processing |

The model is specified via the CLI `--model` flag (or `MODEL` env var), so the same configs work for both the 0.6B and 1.7B model variants.
The 2-stage pipeline (Talker -> Code2Wav) runs with `async_chunk` streaming enabled via the prod deploy; the `SharedMemoryConnector` streams codec frames (25-frame chunks with 25-frame context overlap) between stages.

The model is specified via the CLI `--model` flag (or `MODEL` env var), so the same bench script works for both the 0.6B and 1.7B model variants.

## Metrics

Expand Down
89 changes: 47 additions & 42 deletions benchmarks/qwen3-tts/run_benchmark.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,18 +26,18 @@
# # Use Voice Clone model
# MODEL=Qwen/Qwen3-TTS-12Hz-1.7B-Base TASK_TYPE=Base bash run_benchmark.sh --async-only
#
# # Use batch_size=4 config:
# STAGE_CONFIG=vllm_omni/configs/qwen3_tts_bs4.yaml bash run_benchmark.sh --async-only
# # Use batch_size=4:
# BATCH_SIZE=4 bash run_benchmark.sh --async-only
#
# Environment variables:
# GPU_DEVICE - GPU index to use (default: 0)
# NUM_PROMPTS - Number of prompts per concurrency level (default: 50)
# CONCURRENCY - Space-separated concurrency levels (default: "1 4 10")
# MODEL - Model name (default: Qwen/Qwen3-TTS-12Hz-0.6B-CustomVoice)
# PORT - Server port (default: 8000)
# GPU_MEM_TALKER - gpu_memory_utilization for talker stage (default: 0.3)
# GPU_MEM_CODE2WAV - gpu_memory_utilization for code2wav stage (default: 0.2)
# STAGE_CONFIG - Path to stage config YAML (default: configs/qwen3_tts_bs1.yaml)
# BATCH_SIZE - Per-stage ``max_num_seqs`` for both talker and code2wav (default: 1)
# GPU_MEM_TALKER - gpu_memory_utilization for talker stage (default: 0.3 at bs=1, else 0.2)
# GPU_MEM_CODE2WAV - gpu_memory_utilization for code2wav stage (default: 0.3 at bs=1, else 0.2)
# TASK_TYPE - Task type: CustomVoice, VoiceDesign, Base (default: CustomVoice)

set -euo pipefail
Expand All @@ -51,14 +51,36 @@ NUM_PROMPTS="${NUM_PROMPTS:-50}"
CONCURRENCY="${CONCURRENCY:-1 4 10}"
MODEL="${MODEL:-Qwen/Qwen3-TTS-12Hz-0.6B-CustomVoice}"
PORT="${PORT:-8000}"
GPU_MEM_TALKER="${GPU_MEM_TALKER:-0.3}"
GPU_MEM_CODE2WAV="${GPU_MEM_CODE2WAV:-0.2}"
BATCH_SIZE="${BATCH_SIZE:-1}"
DEFAULT_MEM=$([ "${BATCH_SIZE}" = "1" ] && echo "0.3" || echo "0.2")
GPU_MEM_TALKER="${GPU_MEM_TALKER:-${DEFAULT_MEM}}"
GPU_MEM_CODE2WAV="${GPU_MEM_CODE2WAV:-${DEFAULT_MEM}}"
NUM_WARMUPS="${NUM_WARMUPS:-3}"
STAGE_CONFIG="${STAGE_CONFIG:-vllm_omni/configs/qwen3_tts_bs1.yaml}"
DEPLOY_CONFIG="vllm_omni/deploy/qwen3_tts.yaml"
RESULT_DIR="${SCRIPT_DIR}/results"
TIMESTAMP="$(date +%Y%m%d_%H%M%S)"
TASK_TYPE="${TASK_TYPE:-CustomVoice}"

# Build --stage-overrides JSON from BATCH_SIZE + GPU_MEM_*.
STAGE_OVERRIDES=$(
BATCH_SIZE="${BATCH_SIZE}" \
GPU_MEM_TALKER="${GPU_MEM_TALKER}" \
GPU_MEM_CODE2WAV="${GPU_MEM_CODE2WAV}" \
python - <<'PYEOF'
import json, os
bs = int(os.environ["BATCH_SIZE"])
mem_t = float(os.environ["GPU_MEM_TALKER"])
mem_c = float(os.environ["GPU_MEM_CODE2WAV"])
# Prefill budget grows with batch size on both stages.
talker_batched = 512 if bs <= 4 else 4096
code2wav_batched = 8192 if bs <= 4 else 32768
print(json.dumps({
"0": {"max_num_seqs": bs, "gpu_memory_utilization": mem_t, "max_num_batched_tokens": talker_batched},
"1": {"max_num_seqs": bs, "gpu_memory_utilization": mem_c, "max_num_batched_tokens": code2wav_batched},
}))
PYEOF
)

# Parse args
RUN_ASYNC=true
RUN_HF=true
Expand All @@ -75,41 +97,27 @@ mkdir -p "${RESULT_DIR}"
echo "============================================================"
echo " Qwen3-TTS Benchmark"
echo "============================================================"
echo " GPU: ${GPU_DEVICE}"
echo " Model: ${MODEL}"
echo " Prompts: ${NUM_PROMPTS}"
echo " Concurrency: ${CONCURRENCY}"
echo " Port: ${PORT}"
echo " Stage config: ${STAGE_CONFIG}"
echo " Results: ${RESULT_DIR}"
echo " Task type: ${TASK_TYPE}"
echo " GPU: ${GPU_DEVICE}"
echo " Model: ${MODEL}"
echo " Prompts: ${NUM_PROMPTS}"
echo " Concurrency: ${CONCURRENCY}"
echo " Port: ${PORT}"
echo " Deploy config: ${DEPLOY_CONFIG}"
echo " Batch size: ${BATCH_SIZE}"
echo " GPU mem T/C: ${GPU_MEM_TALKER} / ${GPU_MEM_CODE2WAV}"
echo " Results: ${RESULT_DIR}"
echo " Task type: ${TASK_TYPE}"
echo "============================================================"

# Prepare stage config with correct GPU device and memory settings
prepare_config() {
local config_template="$1"
local config_name="$2"
local output_path="${RESULT_DIR}/${config_name}_stage_config.yaml"

# Use sed to patch GPU device and memory utilization
sed \
-e "s/devices: \"0\"/devices: \"${GPU_DEVICE}\"/g" \
-e "s/gpu_memory_utilization: 0.3/gpu_memory_utilization: ${GPU_MEM_TALKER}/g" \
-e "s/gpu_memory_utilization: 0.2/gpu_memory_utilization: ${GPU_MEM_CODE2WAV}/g" \
"${config_template}" > "${output_path}"

echo "${output_path}"
}

# Start server and wait for it to be ready
start_server() {
local stage_config="$1"
local config_name="$2"
local config_name="$1"
local log_file="${RESULT_DIR}/server_${config_name}_${TIMESTAMP}.log"

echo ""
echo "Starting server with config: ${config_name}"
echo " Stage config: ${stage_config}"
echo " Deploy config: ${DEPLOY_CONFIG}"
echo " Stage overrides: ${STAGE_OVERRIDES}"
echo " Log file: ${log_file}"

VLLM_WORKER_MULTIPROC_METHOD=spawn \
Expand All @@ -118,7 +126,8 @@ start_server() {
--omni \
--host 127.0.0.1 \
--port "${PORT}" \
--stage-configs-path "${stage_config}" \
--deploy-config "${DEPLOY_CONFIG}" \
--stage-overrides "${STAGE_OVERRIDES}" \
--stage-init-timeout 120 \
--trust-remote-code \
--disable-log-stats \
Expand Down Expand Up @@ -175,17 +184,13 @@ trap 'stop_server' EXIT
# Run benchmark for a given config
run_bench() {
local config_name="$1"
local config_template="$2"

echo ""
echo "============================================================"
echo " Benchmarking: ${config_name}"
echo "============================================================"

local stage_config
stage_config=$(prepare_config "${config_template}" "${config_name}")

start_server "${stage_config}" "${config_name}"
start_server "${config_name}"

# Convert concurrency string to args
local conc_args=""
Expand All @@ -212,7 +217,7 @@ run_bench() {

# Run vllm-omni benchmark
if [ "${RUN_ASYNC}" = true ]; then
run_bench "async_chunk" "${SCRIPT_DIR}/${STAGE_CONFIG}"
run_bench "async_chunk"
fi

# Run HuggingFace baseline benchmark
Expand Down
88 changes: 0 additions & 88 deletions benchmarks/qwen3-tts/vllm_omni/configs/qwen3_tts_bs1.yaml

This file was deleted.

89 changes: 0 additions & 89 deletions benchmarks/qwen3-tts/vllm_omni/configs/qwen3_tts_bs16.yaml

This file was deleted.

Loading
Loading