diff --git a/tests/e2e/offline_inference/test_stable_audio_expansion.py b/tests/e2e/offline_inference/test_stable_audio_expansion.py index a7968aef366..24ea787213a 100644 --- a/tests/e2e/offline_inference/test_stable_audio_expansion.py +++ b/tests/e2e/offline_inference/test_stable_audio_expansion.py @@ -77,8 +77,14 @@ def test_stable_audio_quantization_and_teacache() -> None: CI should provide ``HF_TOKEN`` if the checkpoint is gated. """ + # ``model_class_name`` must be passed explicitly: the default-stage-cfg + # factory in ``async_omni_engine.py`` reads it out of ``kwargs`` when + # deciding ``final_output_type`` (#2077), and at construction time the + # auto-resolution from ``model_index.json`` has not run yet. AudioX's + # offline test follows the same pattern. m = Omni( model="stabilityai/stable-audio-open-1.0", + model_class_name="StableAudioPipeline", quantization="fp8", cache_backend="tea_cache", cache_config={"rel_l1_thresh": 0.2}, diff --git a/vllm_omni/diffusion/models/stable_audio/pipeline_stable_audio.py b/vllm_omni/diffusion/models/stable_audio/pipeline_stable_audio.py index d6b4f9e93e0..7816e49217a 100644 --- a/vllm_omni/diffusion/models/stable_audio/pipeline_stable_audio.py +++ b/vllm_omni/diffusion/models/stable_audio/pipeline_stable_audio.py @@ -12,6 +12,7 @@ import os from collections.abc import Iterable +from typing import ClassVar import torch from diffusers import AutoencoderOobleck @@ -75,6 +76,13 @@ class StableAudioPipeline(nn.Module, SupportAudioOutput, DiffusionPipelineProfil prefix: Weight prefix for loading (default: "") """ + # Picked up by ``supports_audio_output`` in the diffusion engine so the + # default stage metadata reports ``final_output_type="audio"`` and the + # ``multimodal_output`` payload includes the sample rate (mirrors the + # contract introduced for AudioX in #2077). + support_audio_output: ClassVar[bool] = True + audio_sample_rate: ClassVar[int] = 44100 + def __init__( self, *,