From 02174aadcae36bc6bd192925030ba4bea1b2fb72 Mon Sep 17 00:00:00 2001 From: Yueqian Lin Date: Wed, 29 Apr 2026 01:21:14 -0400 Subject: [PATCH] feat(bench/dfx): add high-load stress phase for Qwen3-TTS Daily TTS perf CI currently caps at max_concurrency=8 in the throughput regime, so high-load TTFA tail regressions (e.g. the Code2Wav cross-request batching gap discussed in #3163 / shown by #3221) are invisible to nightly. This adds a stress phase mirroring the open-loop pattern already used by test_qwen_omni.json: 100 requests at request_rate=2.0 for both default_voice and voice_design. Baselines are intentionally loose (median TTFA 3.0-3.5 s, median RTF 0.25-0.30, audio_throughput floor 4.0 audio-s/wall-s) so the entry alarms only on real regressions and can be tightened in a follow-up once we have a few nightly runs. Signed-off-by: Yueqian Lin --- tests/dfx/perf/tests/test_tts.json | 36 ++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/tests/dfx/perf/tests/test_tts.json b/tests/dfx/perf/tests/test_tts.json index 06c9c4d2384..fbd09d21966 100644 --- a/tests/dfx/perf/tests/test_tts.json +++ b/tests/dfx/perf/tests/test_tts.json @@ -115,6 +115,24 @@ "mean_audio_rtf": [0.35] } }, + { + "task": "default_voice", + "eval_phase": "stress", + "dataset_name": "seed-tts-text", + "backend": "openai-audio-speech", + "endpoint": "/v1/audio/speech", + "dataset_path": "benchmarks/build_dataset/seed_tts_smoke", + "num_prompts": [100], + "request_rate": [2.0], + "seed_tts_locale": "en", + "extra_body": {"voice": "Vivian", "language": "English", "task_type": "CustomVoice"}, + "percentile-metrics": "ttft,e2el,audio_rtf,audio_ttfp,audio_duration", + "baseline": { + "median_audio_ttfp_ms": [3000], + "median_audio_rtf": [0.25], + "audio_throughput": [4.0] + } + }, { "task": "voice_design", "eval_phase": "latency", @@ -149,6 +167,24 @@ "median_audio_rtf": [0.35], "audio_throughput": [25.0] } + }, + { + "task": "voice_design", + "eval_phase": "stress", + "dataset_name": "seed-tts-design", + "backend": "openai-audio-speech", + "endpoint": "/v1/audio/speech", + "dataset_path": "benchmarks/build_dataset/seed_tts_design", + "num_prompts": [100], + "request_rate": [2.0], + "seed_tts_locale": "en", + "extra_body": {"task_type": "VoiceDesign", "language": "English"}, + "percentile-metrics": "ttft,e2el,audio_rtf,audio_ttfp,audio_duration", + "baseline": { + "median_audio_ttfp_ms": [3500], + "median_audio_rtf": [0.30], + "audio_throughput": [4.0] + } } ] }