diff --git a/docs/models/supported_models.md b/docs/models/supported_models.md index b81ff5b992c..23b2cc715c9 100644 --- a/docs/models/supported_models.md +++ b/docs/models/supported_models.md @@ -20,6 +20,7 @@ th { | `Qwen3OmniMoeForConditionalGeneration` | Qwen3-Omni | `Qwen/Qwen3-Omni-30B-A3B-Instruct` | ✅︎ | ✅︎ | ✅︎ | ✅︎ | | `Qwen2_5OmniForConditionalGeneration` | Qwen2.5-Omni | `Qwen/Qwen2.5-Omni-7B`, `Qwen/Qwen2.5-Omni-3B` | ✅︎ | ✅︎ | ✅︎ | ✅︎ | | `BagelForConditionalGeneration` | BAGEL (DiT-only) | `ByteDance-Seed/BAGEL-7B-MoT` | ✅︎ | ✅︎ | | ✅︎ | +| `TunaExternalPipeline` | Tuna/Tuna-2 (recognized only; runtime integration not yet available) | [`facebookresearch/tuna-2`](https://github.com/facebookresearch/tuna-2) | | | | | | `InternVLAA1Pipeline` | InternVLA-A1 | `InternRobotics/InternVLA-A1-3B` | ✅︎ | ✅︎ | | | | `HunyuanImage3ForCausalMM` | HunyuanImage3.0 (DiT-only) | `tencent/HunyuanImage-3.0`, `tencent/HunyuanImage-3.0-Instruct` | ✅︎ | ✅︎ | ✅︎ | ✅︎ | | `QwenImagePipeline` | Qwen-Image | `Qwen/Qwen-Image` | ✅︎ | ✅︎ | ✅︎ | ✅︎ | @@ -68,3 +69,5 @@ th { |`DyninOmniForConditionalGeneration` | Dynin-Omni | `snu-aidas/Dynin-Omni` | ✅︎ | | | | ✅︎ indicates the model is supported on that backend. Empty cells mean not listed as supported on that backend. + +Tuna/Tuna-2 metadata is recognized by vLLM-Omni so users get an explicit integration-status error instead of an unknown-model failure. Runtime inference is not available yet because the upstream [`facebookresearch/tuna-2`](https://github.com/facebookresearch/tuna-2) project currently uses its own Hydra-based inference flow and `.pt` checkpoint format without a stable HuggingFace/diffusers loading contract. diff --git a/tests/diffusion/models/test_tuna_detection.py b/tests/diffusion/models/test_tuna_detection.py new file mode 100644 index 00000000000..e7d98251887 --- /dev/null +++ b/tests/diffusion/models/test_tuna_detection.py @@ -0,0 +1,65 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project +"""Tests for Tuna/Tuna-2 model recognition scaffolding.""" + +from types import SimpleNamespace + +import pytest + +from vllm_omni.diffusion.data import OmniDiffusionConfig +from vllm_omni.diffusion.registry import DiffusionModelRegistry +from vllm_omni.diffusion.utils import hf_utils + +pytestmark = [pytest.mark.core_model, pytest.mark.cpu] + + +def test_tuna_external_pipeline_registered(): + cls = DiffusionModelRegistry._try_load_model_cls("TunaExternalPipeline") + assert cls is not None + + +def test_tuna_config_enriches_to_external_pipeline(monkeypatch): + def fake_get_hf_file_to_dict(filename, model, revision=None): + if filename == "model_index.json": + return None + if filename == "config.json": + return {"model_type": "tuna_2_pixel"} + return None + + monkeypatch.setattr("vllm.transformers_utils.config.get_hf_file_to_dict", fake_get_hf_file_to_dict) + + od_config = OmniDiffusionConfig(model="dummy-tuna") + od_config.enrich_config() + + assert od_config.model_class_name == "TunaExternalPipeline" + + +def test_is_diffusion_model_detects_tuna_config(monkeypatch): + monkeypatch.setattr("os.path.isdir", lambda _model: False) + + def fake_get_hf_file_to_dict(filename, model_name): + if filename == "model_index.json": + return None + if filename == "config.json": + return {"architectures": ["Tuna2PixelModel"]} + return None + + monkeypatch.setattr(hf_utils, "get_hf_file_to_dict", fake_get_hf_file_to_dict) + monkeypatch.setattr(hf_utils, "load_diffusers_config", lambda _model: (_ for _ in ()).throw(ValueError("nope"))) + hf_utils.is_diffusion_model.cache_clear() + + assert hf_utils.is_diffusion_model("dummy-tuna") is True + + +def test_tuna_external_pipeline_error_is_actionable(): + cls = DiffusionModelRegistry._try_load_model_cls("TunaExternalPipeline") + od_config = SimpleNamespace() + + with pytest.raises(RuntimeError) as exc_info: + cls(od_config=od_config) + + message = str(exc_info.value) + assert "Tuna/Tuna-2 is recognized" in message + assert "runtime integration is not available yet" in message + assert "#3303" in message + assert "https://github.com/facebookresearch/tuna-2" in message diff --git a/tests/entrypoints/test_utils.py b/tests/entrypoints/test_utils.py index 6e52e4c6c0c..5f3aba93699 100644 --- a/tests/entrypoints/test_utils.py +++ b/tests/entrypoints/test_utils.py @@ -345,6 +345,39 @@ def mock_exists(path): assert result is not None assert "voxcpm.yaml" in result + def test_tuna_transformers_format_resolution(self, mocker: MockerFixture): + """Test Tuna-2 model_type aliases resolve to the tuna stage config.""" + mocker.patch( + "vllm_omni.entrypoints.utils.get_config", + side_effect=ValueError("missing transformers config"), + ) + mocker.patch( + "vllm_omni.entrypoints.utils.file_or_path_exists", + side_effect=lambda _model, filename, revision=None: filename == "config.json", + ) + mocker.patch( + "vllm_omni.entrypoints.utils.get_hf_file_to_dict", + return_value={"model_type": "tuna_2_pixel"}, + ) + mocker.patch( + "vllm_omni.entrypoints.utils.current_omni_platform.get_default_stage_config_path", + return_value="vllm_omni/model_executor/stage_configs", + ) + + original_exists = os.path.exists + + def mock_exists(path): + if "tuna.yaml" in str(path): + return True + return original_exists(path) + + mocker.patch("os.path.exists", side_effect=mock_exists) + + result = resolve_model_config_path("facebookresearch/tuna-2") + + assert result is not None + assert "tuna.yaml" in result + class TestLoadAndResolveStageConfigs: def test_load_and_resolve_with_kwargs(self): diff --git a/tests/test_config_factory.py b/tests/test_config_factory.py index 16d49034fa1..42895a5662e 100644 --- a/tests/test_config_factory.py +++ b/tests/test_config_factory.py @@ -710,6 +710,7 @@ def test_registry_has_known_models(self): assert "qwen2_5_omni" in _PIPELINE_REGISTRY assert "qwen3_omni_moe" in _PIPELINE_REGISTRY assert "qwen3_tts" in _PIPELINE_REGISTRY + assert "tuna" in _PIPELINE_REGISTRY def test_registry_loads_pipeline_on_getitem(self): """Looking up a registered model_type returns the matching PipelineConfig.""" diff --git a/vllm_omni/config/pipeline_registry.py b/vllm_omni/config/pipeline_registry.py index 6f5c072a353..d7fc7092441 100644 --- a/vllm_omni/config/pipeline_registry.py +++ b/vllm_omni/config/pipeline_registry.py @@ -61,6 +61,10 @@ "vllm_omni.model_executor.models.bagel.pipeline", "BAGEL_SINGLE_STAGE_PIPELINE", ), + "tuna": ( + "vllm_omni.model_executor.models.tuna.pipeline", + "TUNA_PIPELINE", + ), "glm_image": ( "vllm_omni.model_executor.models.glm_image.pipeline", "GLM_IMAGE_PIPELINE", diff --git a/vllm_omni/config/stage_config.py b/vllm_omni/config/stage_config.py index 44cc83baea8..9b194413f91 100644 --- a/vllm_omni/config/stage_config.py +++ b/vllm_omni/config/stage_config.py @@ -27,6 +27,16 @@ def get_pipeline_path(model_dir: str, filename: str) -> Path: logger = init_logger(__name__) +_TUNA_MODEL_TYPES = { + "tuna", + "tuna2", + "tuna_2", + "tuna_2_pixel", + "tuna2_pixel", + "tuna_2r_pixel", + "tuna2r_pixel", +} + def _warn_deprecated_kwargs(kwargs: dict[str, Any]) -> None: if "cli_explicit_keys" in kwargs: @@ -1014,6 +1024,10 @@ def create_from_model( # --- New path: check pipeline registry by model_type first --- model_type, hf_config = cls._auto_detect_model_type(model, trust_remote_code=trust_remote_code) + if model_type: + normalized_model_type = model_type.replace("-", "_").lower() + if normalized_model_type in _TUNA_MODEL_TYPES: + model_type = "tuna" if model_type and model_type in _PIPELINE_REGISTRY: return cls._create_from_registry(model_type, cli_overrides, deploy_config_path) diff --git a/vllm_omni/deploy/tuna.yaml b/vllm_omni/deploy/tuna.yaml new file mode 100644 index 00000000000..c315becb74a --- /dev/null +++ b/vllm_omni/deploy/tuna.yaml @@ -0,0 +1,17 @@ +# Tuna/Tuna-2 single-stage deploy placeholder. +# +# Upstream Tuna-2 currently publishes code and expects local `.pt` checkpoints +# driven through its Hydra CLI. This config lets vLLM-Omni recognize Tuna model +# metadata and route startup to a dedicated pipeline entrypoint with a clear +# integration message until a stable checkpoint/runtime contract is available. + +async_chunk: false + +stages: + - stage_id: 0 + max_num_seqs: 1 + devices: "0" + engine_extras: + model_class_name: TunaExternalPipeline + custom_pipeline_args: + variant: none_encoder diff --git a/vllm_omni/diffusion/data.py b/vllm_omni/diffusion/data.py index 676fcd9c79c..da683deae90 100644 --- a/vllm_omni/diffusion/data.py +++ b/vllm_omni/diffusion/data.py @@ -96,17 +96,20 @@ def _validate_parallel_config(self) -> Self: assert self.ulysses_degree > 0, "Ulysses degree must be > 0" assert self.ring_degree > 0, "Ring degree must be > 0" assert self.cfg_parallel_size > 0, "CFG parallel size must be > 0" - assert self.cfg_parallel_size in [1, 2, 3], ( - f"CFG parallel size must be 1, 2, or 3, but got {self.cfg_parallel_size}" - ) + assert self.cfg_parallel_size in [ + 1, + 2, + 3, + ], f"CFG parallel size must be 1, 2, or 3, but got {self.cfg_parallel_size}" assert self.vae_patch_parallel_size > 0, "VAE patch parallel size must be > 0" assert self.sequence_parallel_size == self.ulysses_degree * self.ring_degree, ( "Sequence parallel size must be equal to the product of ulysses degree and ring degree," f" but got {self.sequence_parallel_size} != {self.ulysses_degree} * {self.ring_degree}" ) - assert self.ulysses_mode in {"strict", "advanced_uaa"}, ( - f"ulysses_mode must be one of {{'strict','advanced_uaa'}}, but got {self.ulysses_mode!r}." - ) + assert self.ulysses_mode in { + "strict", + "advanced_uaa", + }, f"ulysses_mode must be one of {{'strict','advanced_uaa'}}, but got {self.ulysses_mode!r}." # Validate HSDP configuration if self.use_hsdp: @@ -525,12 +528,12 @@ class OmniDiffusionConfig: @property def is_moe(self) -> bool: num_experts = self.tf_model_config.get("num_experts", None) - if not isinstance(num_experts, (list, tuple, int)): + if not isinstance(num_experts, list | tuple | int): return False if isinstance(num_experts, int): return num_experts > 0 - if isinstance(num_experts, (list, tuple)): + if isinstance(num_experts, list | tuple): return any(isinstance(n, int) and n > 0 for n in num_experts) return False @@ -733,13 +736,27 @@ def enrich_config(self) -> None: model_type = cfg.get("model_type") architectures = cfg.get("architectures") or [] - if model_type == "bagel" or "BagelForConditionalGeneration" in architectures: - self.model_class_name = "BagelPipeline" - self.tf_model_config = TransformerConfig() - self.update_multimodal_support() - elif model_type == "nextstep": - if self.model_class_name is None: - self.model_class_name = "NextStep11Pipeline" + normalized_model_type = str(model_type or "").replace("-", "_").lower() + + if model_type == "bagel" or "BagelForConditionalGeneration" in architectures: + self.model_class_name = "BagelPipeline" + self.tf_model_config = TransformerConfig() + self.update_multimodal_support() + elif normalized_model_type in { + "tuna", + "tuna2", + "tuna_2", + "tuna_2_pixel", + "tuna2_pixel", + "tuna_2r_pixel", + "tuna2r_pixel", + } or any(str(arch).startswith("Tuna") for arch in architectures): + self.model_class_name = "TunaExternalPipeline" + self.tf_model_config = TransformerConfig() + self.update_multimodal_support() + elif model_type == "nextstep": + if self.model_class_name is None: + self.model_class_name = "NextStep11Pipeline" self.tf_model_config = TransformerConfig() self.update_multimodal_support() elif architectures and len(architectures) == 1: diff --git a/vllm_omni/diffusion/models/tuna/__init__.py b/vllm_omni/diffusion/models/tuna/__init__.py new file mode 100644 index 00000000000..6fc1086b049 --- /dev/null +++ b/vllm_omni/diffusion/models/tuna/__init__.py @@ -0,0 +1,3 @@ +from .pipeline_tuna import TunaExternalPipeline + +__all__ = ["TunaExternalPipeline"] diff --git a/vllm_omni/diffusion/models/tuna/pipeline_tuna.py b/vllm_omni/diffusion/models/tuna/pipeline_tuna.py new file mode 100644 index 00000000000..0636f84e9c6 --- /dev/null +++ b/vllm_omni/diffusion/models/tuna/pipeline_tuna.py @@ -0,0 +1,59 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project +"""Tuna/Tuna-2 integration placeholder. + +The upstream Tuna-2 project currently publishes research/inference code but no +released model weights or HuggingFace-style runtime package contract. vLLM-Omni +can still recognize Tuna configs and route them here so users get an actionable +message instead of a generic "unknown model" failure. +""" + +from __future__ import annotations + +from torch import nn + +from vllm_omni.diffusion.data import DiffusionOutput, OmniDiffusionConfig +from vllm_omni.diffusion.request import OmniDiffusionRequest + +# TODO(#3303): Replace this placeholder once Tuna publishes a stable runtime +# loading contract that can be validated end to end in vLLM-Omni. +_TUNA_NOT_READY = ( + "Tuna/Tuna-2 is recognized by vLLM-Omni, but the runtime integration is " + "not available yet. Track vLLM-Omni issue #3303 and the upstream " + "facebookresearch/tuna-2 repository at " + "https://github.com/facebookresearch/tuna-2. The upstream project " + "currently uses its own Hydra-based inference entrypoint and checkpoint " + "format, and does not publish full model weights or a stable " + "HuggingFace/diffusers loading contract. To finish this integration, " + "port Tuna2PixelPipeline/Tuna2RPixelPipeline/TunaPipeline into " + "vllm_omni.diffusion.models.tuna and add checkpoint loading for the " + "upstream .pt files." +) + + +def get_tuna_post_process_func(od_config: OmniDiffusionConfig): + def post_process_func(x): + return x + + return post_process_func + + +class TunaExternalPipeline(nn.Module): + """Recognized Tuna pipeline entrypoint. + + This class intentionally fails during initialization with a clear message. + Keeping it registered lets model detection, stage-config resolution, and + documentation converge before upstream releases a stable checkpoint/runtime + contract that can be validated end to end. + """ + + def __init__(self, *, od_config: OmniDiffusionConfig, prefix: str = ""): + super().__init__() + self.od_config = od_config + raise RuntimeError(_TUNA_NOT_READY) + + def forward(self, req: OmniDiffusionRequest) -> DiffusionOutput: + raise RuntimeError(_TUNA_NOT_READY) + + def load_weights(self, *args: object, **kwargs: object) -> set[str]: + raise RuntimeError(_TUNA_NOT_READY) diff --git a/vllm_omni/diffusion/registry.py b/vllm_omni/diffusion/registry.py index 37f5199447c..cc59321ec16 100644 --- a/vllm_omni/diffusion/registry.py +++ b/vllm_omni/diffusion/registry.py @@ -135,6 +135,11 @@ "pipeline_bagel", "BagelPipeline", ), + "TunaExternalPipeline": ( + "tuna", + "pipeline_tuna", + "TunaExternalPipeline", + ), "InternVLAA1Pipeline": ( "internvla_a1", "pipeline_internvla_a1", @@ -426,6 +431,7 @@ def _apply_sequence_parallel_if_enabled(model, od_config: OmniDiffusionConfig) - "WanI2VDMD2Pipeline": "get_wan22_i2v_post_process_func", "LongCatImagePipeline": "get_longcat_image_post_process_func", "BagelPipeline": "get_bagel_post_process_func", + "TunaExternalPipeline": "get_tuna_post_process_func", "InternVLAA1Pipeline": "get_internvla_a1_post_process_func", "LongCatImageEditPipeline": "get_longcat_image_post_process_func", "StableDiffusion3Pipeline": "get_sd3_image_post_process_func", diff --git a/vllm_omni/diffusion/utils/hf_utils.py b/vllm_omni/diffusion/utils/hf_utils.py index 6beb1823ce0..1b09779fb46 100644 --- a/vllm_omni/diffusion/utils/hf_utils.py +++ b/vllm_omni/diffusion/utils/hf_utils.py @@ -27,6 +27,27 @@ def _looks_like_bagel(model_name: str) -> bool: return False +def _looks_like_tuna(model_name: str) -> bool: + """Best-effort detection for Tuna/Tuna-2 unified image models.""" + try: + cfg = get_hf_file_to_dict("config.json", model_name) + model_type = str(cfg.get("model_type", "")).replace("-", "_").lower() + if model_type in { + "tuna", + "tuna2", + "tuna_2", + "tuna_2_pixel", + "tuna2_pixel", + "tuna_2r_pixel", + "tuna2r_pixel", + }: + return True + architectures = cfg.get("architectures") or [] + return any(str(arch).startswith("Tuna") for arch in architectures) + except Exception: + return False + + @lru_cache def is_diffusion_model(model_name: str) -> bool: """Check if a model is a diffusion model. @@ -74,4 +95,4 @@ def is_diffusion_model(model_name: str) -> bool: # Bagel is not a diffusers pipeline (no model_index.json), but is still a # diffusion-style model in vllm-omni. Detect it via config.json. - return _looks_like_bagel(model_name) + return _looks_like_bagel(model_name) or _looks_like_tuna(model_name) diff --git a/vllm_omni/entrypoints/utils.py b/vllm_omni/entrypoints/utils.py index d728e76417c..96302b7e7b0 100644 --- a/vllm_omni/entrypoints/utils.py +++ b/vllm_omni/entrypoints/utils.py @@ -39,6 +39,18 @@ def _warn_deprecated_explicit_keys(kwargs: dict[str, Any]) -> None: "GlmImagePipeline": "glm_image", } +_MODEL_TYPE_TO_CONFIG: dict[str, str] = { + # Tuna-2 upstream uses variant names in scripts/configs rather than a + # stable HF model_type yet. Route the known names to one vLLM-Omni config. + "tuna": "tuna", + "tuna2": "tuna", + "tuna_2": "tuna", + "tuna_2_pixel": "tuna", + "tuna2_pixel": "tuna", + "tuna_2r_pixel": "tuna", + "tuna2r_pixel": "tuna", +} + def detect_explicit_cli_keys( argv: list[str], @@ -172,12 +184,7 @@ def _is_callable_value(value: Any) -> bool: return True return isinstance( value, - ( - types.FunctionType, - types.MethodType, - types.BuiltinFunctionType, - types.BuiltinMethodType, - ), + types.FunctionType | types.MethodType | types.BuiltinFunctionType | types.BuiltinMethodType, ) result = {} @@ -245,10 +252,10 @@ def _convert_dataclasses_to_dict(obj: Any) -> Any: if callable(obj): return None # Handle lists and tuples (recurse into items) - if isinstance(obj, (list, tuple)): + if isinstance(obj, list | tuple): return type(obj)(_convert_dataclasses_to_dict(item) for item in obj if not callable(item)) # Try to convert any dict-like object (has keys/values methods) to dict - if hasattr(obj, "keys") and hasattr(obj, "values") and not isinstance(obj, (str, bytes)): + if hasattr(obj, "keys") and hasattr(obj, "values") and not isinstance(obj, str | bytes): try: return _filter_dict_like_object(obj) except (TypeError, ValueError, AttributeError): @@ -340,6 +347,7 @@ def resolve_model_config_path(model: str) -> str: normalized_model_type = _DIFFUSERS_CLASS_TO_CONFIG[model_type] else: normalized_model_type = model_type.replace("-", "_") + normalized_model_type = _MODEL_TYPE_TO_CONFIG.get(normalized_model_type, normalized_model_type) model_type_str = f"{normalized_model_type}.yaml" complete_config_path = PROJECT_ROOT / default_config_path / model_type_str if os.path.exists(complete_config_path): @@ -712,7 +720,7 @@ def _filter_value(value: Any, annotation: Any) -> Any: if origin in (list, tuple, set): args = get_args(annotation) inner = args[0] if args else None - if isinstance(value, (list, tuple, set)): + if isinstance(value, list | tuple | set): return type(value)(_filter_value(v, inner) for v in value) return value diff --git a/vllm_omni/model_executor/models/tuna/__init__.py b/vllm_omni/model_executor/models/tuna/__init__.py new file mode 100644 index 00000000000..b64795b1654 --- /dev/null +++ b/vllm_omni/model_executor/models/tuna/__init__.py @@ -0,0 +1 @@ +"""Tuna model pipeline metadata.""" diff --git a/vllm_omni/model_executor/models/tuna/pipeline.py b/vllm_omni/model_executor/models/tuna/pipeline.py new file mode 100644 index 00000000000..d59daefc09e --- /dev/null +++ b/vllm_omni/model_executor/models/tuna/pipeline.py @@ -0,0 +1,36 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project +"""Tuna/Tuna-2 pipeline topology.""" + +from vllm_omni.config.stage_config import ( + PipelineConfig, + StageExecutionType, + StagePipelineConfig, +) + +TUNA_PIPELINE = PipelineConfig( + model_type="tuna", + hf_architectures=( + "TunaPipeline", + "Tuna2PixelPipeline", + "Tuna2RPixelPipeline", + "Tuna2PixelModel", + ), + stages=( + StagePipelineConfig( + stage_id=0, + model_stage="diffusion", + execution_type=StageExecutionType.DIFFUSION, + input_sources=(), + final_output=True, + final_output_type="image", + extras={ + "default_sampling_params": { + "seed": 42, + "num_inference_steps": 50, + "guidance_scale": 3.0, + }, + }, + ), + ), +) diff --git a/vllm_omni/model_executor/stage_configs/tuna.yaml b/vllm_omni/model_executor/stage_configs/tuna.yaml new file mode 100644 index 00000000000..e2046401a2c --- /dev/null +++ b/vllm_omni/model_executor/stage_configs/tuna.yaml @@ -0,0 +1,25 @@ +# Tuna/Tuna-2 single-stage diffusion placeholder. +# +# Upstream Tuna-2 currently publishes code and expects local `.pt` checkpoints +# driven through its Hydra CLI. This config lets vLLM-Omni recognize Tuna model +# metadata and route startup to a dedicated pipeline entrypoint with a clear +# integration message until a stable checkpoint/runtime contract is available. + +stage_args: + - stage_id: 0 + stage_type: diffusion + runtime: + devices: "0" + engine_args: + model_stage: diffusion + model_class_name: TunaExternalPipeline + trust_remote_code: true + distributed_executor_backend: "mp" + custom_pipeline_args: + variant: none_encoder + final_output: true + final_output_type: image + default_sampling_params: + seed: 42 + num_inference_steps: 50 + guidance_scale: 3.0