Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 1 addition & 7 deletions tests/e2e/offline_inference/test_stable_audio_expansion.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,13 +56,7 @@ def generate_stable_audio_short_clip(

assert outputs is not None
first_output = outputs[0]
# Outer OmniRequestOutput.final_output_type comes from get_stage_metadata.
# The nested request_output is the worker OmniRequestOutput
# (e.g. final_output_type="audio") and holds the multimodal payload.
# Follow-up: add StableAudioPipeline stage YAML, and pass model into
# _create_default_diffusion_stage_cfg so default diffusion metadata can set
# final_output_type to "audio" for future audio pipelines without YAML.
assert first_output.final_output_type == "image"
assert first_output.final_output_type == "audio"
assert hasattr(first_output, "request_output") and first_output.request_output

req_out = first_output.request_output
Expand Down
1 change: 1 addition & 0 deletions vllm_omni/entrypoints/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

_DIFFUSERS_CLASS_TO_CONFIG: dict[str, str] = {
"GlmImagePipeline": "glm_image",
"StableAudioPipeline": "stable_audio",
}


Expand Down
8 changes: 8 additions & 0 deletions vllm_omni/model_executor/stage_configs/stable_audio.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# StableAudioPipeline (model_index _class_name). Single-stage: only modality metadata is required here;
# Omni/AsyncOmni **kwargs merge into engine_args; OmniDiffusionConfig supplies defaults (e.g. max_num_seqs).

stage_args:
- stage_id: 0
stage_type: diffusion
final_output: true
final_output_type: audio