Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,7 @@ datasets/
*.csv
*.json
!apps/ComfyUI-vLLM-Omni/example_workflows/*.json
!vllm_omni/model_executor/models/cosyvoice3/hf_config/config.json
*.jsonl
*.parquet

Expand Down
2 changes: 1 addition & 1 deletion docs/models/supported_models.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ th {
|`LongCatImageEditPipeline` | LongCat-Image-Edit | `meituan-longcat/LongCat-Image-Edit` |
|`StableDiffusion3Pipeline` | Stable-Diffusion-3 | `stabilityai/stable-diffusion-3.5-medium` |
|`CosyVoice3Model` | CosyVoice3 | `FunAudioLLM/Fun-CosyVoice3-0.5B-2512` |
|`FunAudioChatForConditionalGeneration` | Fun-Audio-Chat-8B | `FunAudioLLM/Fun-Audio-Chat-8B` |
|`MammothModa2ForConditionalGeneration` | MammothModa2-Preview | `bytedance-research/MammothModa2-Preview` |
|`Flux2KleinPipeline` | FLUX.2-klein | `black-forest-labs/FLUX.2-klein-4B`, `black-forest-labs/FLUX.2-klein-9B` |
|`FluxPipeline` | FLUX.1-dev | `black-forest-labs/FLUX.1-dev` |
Expand All @@ -54,7 +55,6 @@ th {
|`DreamIDOmniPipeline`| DreamID-Omni | `XuGuo699/DreamID-Omni` |
|`VoxtralTTSForConditionalGeneration` | Voxtral TTS | `mistralai/tts-model` |


## List of Supported Models for NPU

<style>
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ include = ["vllm_omni*"]

[tool.setuptools.package-data]
"vllm_omni" = ["_version.py", "py.typed"]
"vllm_omni.model_executor.models.cosyvoice3" = ["hf_config/*.json"]
"vllm_omni.model_executor.stage_configs" = ["*.yaml"]

[tool.setuptools_scm]
Expand Down
29 changes: 29 additions & 0 deletions tests/engine/test_stage_init_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
from types import SimpleNamespace

from vllm_omni.engine.stage_init_utils import build_engine_args_dict


def test_build_engine_args_dict_preserves_stage_model_override():
stage_cfg = SimpleNamespace(
stage_id=1,
stage_type="llm",
engine_args=SimpleNamespace(model="stage-model", worker_type="ar"),
)

engine_args = build_engine_args_dict(stage_cfg, model="cli-model")

assert engine_args["model"] == "stage-model"
assert engine_args["stage_id"] == 1


def test_build_engine_args_dict_falls_back_to_cli_model():
stage_cfg = SimpleNamespace(
stage_id=0,
stage_type="llm",
engine_args=SimpleNamespace(worker_type="ar"),
)

engine_args = build_engine_args_dict(stage_cfg, model="cli-model")

assert engine_args["model"] == "cli-model"
assert engine_args["stage_id"] == 0
52 changes: 52 additions & 0 deletions tests/entrypoints/test_funaudiochat_contrib.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project

from pathlib import Path
from types import SimpleNamespace

import pytest
import yaml

from vllm_omni.engine.arg_utils import _resolve_bundled_hf_config_path
from vllm_omni.entrypoints import utils as entrypoint_utils

pytestmark = [pytest.mark.core_model, pytest.mark.cpu]


def test_resolve_bundled_hf_config_path_uses_cosyvoice3_bundle_by_default():
resolved = _resolve_bundled_hf_config_path("FunAudioChatCosyVoice3Code2Wav", None)

assert resolved is not None
assert resolved.endswith("vllm_omni/model_executor/models/cosyvoice3/hf_config")
assert (Path(resolved) / "config.json").is_file()


def test_resolve_bundled_hf_config_path_preserves_explicit_override():
resolved = _resolve_bundled_hf_config_path("FunAudioChatCosyVoice3Code2Wav", "/tmp/custom-hf-config")

assert resolved == "/tmp/custom-hf-config"


def test_resolve_model_config_path_detects_funaudiochat_default_yaml(monkeypatch: pytest.MonkeyPatch):
monkeypatch.setattr(
entrypoint_utils,
"get_config",
lambda model, trust_remote_code=True: SimpleNamespace(model_type="funaudiochat"),
)

resolved = entrypoint_utils.resolve_model_config_path("dummy-funaudiochat-model")

assert resolved is not None
assert resolved.endswith("vllm_omni/model_executor/stage_configs/funaudiochat.yaml")


def test_funaudiochat_default_stage_config_limits_audio_profile_and_keeps_audio_towers():
config_path = (
Path(__file__).resolve().parents[2] / "vllm_omni" / "model_executor" / "stage_configs" / "funaudiochat.yaml"
)
config = yaml.safe_load(config_path.read_text())
stage0_engine_args = config["stage_args"][0]["engine_args"]

assert "language_model_only" not in stage0_engine_args
assert stage0_engine_args["hf_overrides"]["audio_config"]["max_source_positions"] == 100
assert stage0_engine_args["limit_mm_per_prompt"]["audio"] == 1
96 changes: 96 additions & 0 deletions tests/model_executor/models/test_funaudiochat_code2wav.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
from __future__ import annotations

from types import SimpleNamespace

import torch

from vllm_omni.model_executor.models.funaudiochat.funaudiochat_code2wav import (
FunAudioChatCosyVoice3Code2Wav,
)


def test_split_tokens_like_official_keeps_short_inputs_as_single_segment():
token = torch.arange(100, dtype=torch.long)

segments = FunAudioChatCosyVoice3Code2Wav._split_tokens_like_official(token)

assert len(segments) == 1
assert torch.equal(segments[0], token)


def test_split_tokens_like_official_rebalances_tiny_tail_segment():
token = torch.arange(760, dtype=torch.long)

segments = FunAudioChatCosyVoice3Code2Wav._split_tokens_like_official(token)

assert [segment.numel() for segment in segments] == [380, 380]
assert torch.equal(torch.cat(segments, dim=0), token)


def _build_code2wav_stub() -> FunAudioChatCosyVoice3Code2Wav:
model = object.__new__(FunAudioChatCosyVoice3Code2Wav)
model.vllm_config = SimpleNamespace(device_config=SimpleNamespace(device=torch.device("cpu")))
model._max_codec_token_id = 6560
model._dummy_profile_token_len = 32
model._logged_dummy_profile_cap = False
return model


def test_build_decode_tokens_keeps_real_input_ids_without_sampling_metadata():
model = _build_code2wav_stub()
input_ids = torch.tensor([12, 34, 56], dtype=torch.long)

token_batches, is_dummy_profile = model._build_decode_tokens(input_ids, sampling_metadata=None)

assert len(token_batches) == 1
assert token_batches[0].tolist() == [[12, 34, 56]]
assert is_dummy_profile is False


def test_build_decode_tokens_uses_prompt_token_ids_when_input_ids_are_empty():
model = _build_code2wav_stub()
sampling_metadata = SimpleNamespace(prompt_token_ids=[1, 2, 3, 4])

token_batches, is_dummy_profile = model._build_decode_tokens(
torch.empty((0,), dtype=torch.long),
sampling_metadata,
)

assert len(token_batches) == 1
assert token_batches[0].tolist() == [[1, 2, 3, 4]]
assert is_dummy_profile is False


def test_build_decode_tokens_treats_all_zero_missing_metadata_as_dummy_profile():
model = _build_code2wav_stub()
input_ids = torch.zeros((64,), dtype=torch.long)

token_batches, is_dummy_profile = model._build_decode_tokens(input_ids, sampling_metadata=None)

assert len(token_batches) == 1
assert token_batches[0].shape == (1, 32)
assert is_dummy_profile is True


def test_build_decode_tokens_no_longer_rejects_long_sequences_before_segmentation():
model = _build_code2wav_stub()
input_ids = torch.arange(10235, dtype=torch.long) % 6000

token_batches, is_dummy_profile = model._build_decode_tokens(input_ids, sampling_metadata=None)

assert len(token_batches) == 1
assert token_batches[0].shape == (1, 10235)
assert is_dummy_profile is False


def test_build_decode_tokens_preserves_batched_prompt_token_ids_per_request():
model = _build_code2wav_stub()
sampling_metadata = SimpleNamespace(prompt_token_ids=[[1, 2, 3], [4, 5]])

token_batches, is_dummy_profile = model._build_decode_tokens(
torch.empty((0,), dtype=torch.long),
sampling_metadata,
)

assert [token.tolist() for token in token_batches] == [[[1, 2, 3]], [[4, 5]]]
assert is_dummy_profile is False
Loading
Loading