vllm-project · linyueqian · May 7, 2026 · May 7, 2026 · May 7, 2026 · May 7, 2026
@@ -77,8 +77,14 @@ def test_stable_audio_quantization_and_teacache() -> None:
 
     CI should provide ``HF_TOKEN`` if the checkpoint is gated.
     """
+    # ``model_class_name`` must be passed explicitly: the default-stage-cfg
+    # factory in ``async_omni_engine.py`` reads it out of ``kwargs`` when
+    # deciding ``final_output_type`` (#2077), and at construction time the
+    # auto-resolution from ``model_index.json`` has not run yet. AudioX's
+    # offline test follows the same pattern.
     m = Omni(
         model="stabilityai/stable-audio-open-1.0",
+        model_class_name="StableAudioPipeline",
         quantization="fp8",
         cache_backend="tea_cache",
         cache_config={"rel_l1_thresh": 0.2},

@@ -12,6 +12,7 @@
 
 import os
 from collections.abc import Iterable
+from typing import ClassVar
 
 import torch
 from diffusers import AutoencoderOobleck
@@ -75,6 +76,13 @@ class StableAudioPipeline(nn.Module, SupportAudioOutput, DiffusionPipelineProfil
         prefix: Weight prefix for loading (default: "")
     """
 
+    # Picked up by ``supports_audio_output`` in the diffusion engine so the
+    # default stage metadata reports ``final_output_type="audio"`` and the
+    # ``multimodal_output`` payload includes the sample rate (mirrors the
+    # contract introduced for AudioX in #2077).
+    support_audio_output: ClassVar[bool] = True
+    audio_sample_rate: ClassVar[int] = 44100
+
     def __init__(
         self,
         *,