vllm-project · hsliuustc0106 · Apr 16, 2026 · Apr 13, 2026 · Apr 14, 2026 · Apr 15, 2026
@@ -0,0 +1,88 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""
+Prompt construction utilities for HunyuanImage-3.0-Instruct examples.
+
+Wraps system_prompt.get_system_prompt() with task-aware presets so that
+examples and tests don't need to manually concatenate system prompts,
+<img>, <think>, and <recaption> tags.
+
+Usage:
+    from prompt_utils import build_prompt
+
+    # IT2I (image editing, think+recaption mode)
+    prompt = build_prompt("Make the petals neon pink", task="it2i_think")
+
+    # I2T (image understanding)
+    prompt = build_prompt("Describe the content of the picture.", task="i2t")
+"""
+
+from __future__ import annotations
+
+from vllm_omni.diffusion.models.hunyuan_image3.system_prompt import (
+    get_system_prompt,
+)
+
+# task → (sys_type, bot_task, trigger_tag)
+# trigger_tag: "<think>", "<recaption>", or None
+_TASK_PRESETS: dict[str, tuple[str, str | None, str | None]] = {
+    # Pure text generation (text → text, no image)
+    "t2t": ("en_unified", None, None),
+    # Image understanding (image → text)
+    "i2t": ("en_unified", None, None),
+    # Image editing (image+text → image), think+recaption mode
+    "it2i_think": ("en_unified", "think", "<think>"),
+    # Image editing, recaption-only mode
+    "it2i_recaption": ("en_unified", "recaption", "<recaption>"),
+    # Text-to-image, think mode
+    "t2i_think": ("en_unified", "think", "<think>"),
+    # Text-to-image, recaption mode
+    "t2i_recaption": ("en_unified", "recaption", "<recaption>"),
+    # Text-to-image, vanilla (no CoT)
+    "t2i_vanilla": ("en_vanilla", "image", None),
+}
+
+
+def build_prompt(
+    user_prompt: str,
+    task: str = "it2i_think",
+    sys_type: str | None = None,
+    custom_system_prompt: str | None = None,
+) -> str:
+    """Build a complete HunyuanImage-3.0 prompt with auto-selected system
+    prompt and mode trigger tags.
+
+    Args:
+        user_prompt: The user's raw instruction or question.
+        task: One of the preset task keys (see _TASK_PRESETS).
+        sys_type: Override the preset's sys_type for get_system_prompt().
+        custom_system_prompt: Custom system prompt text (used when
+            sys_type="custom").
+
+    Returns:
+        Fully formatted prompt string ready for Omni.generate().
+    """
+    if task not in _TASK_PRESETS:
+        raise ValueError(f"Unknown task {task!r}. Choose from: {sorted(_TASK_PRESETS)}")
+
+    preset_sys_type, preset_bot_task, trigger_tag = _TASK_PRESETS[task]
+    effective_sys_type = sys_type or preset_sys_type
+
+    system_prompt = get_system_prompt(effective_sys_type, preset_bot_task, custom_system_prompt)
+    sys_text = system_prompt.strip() if system_prompt else ""
+
+    has_image_input = task.startswith("i2t") or task.startswith("it2i")
+
+    parts = ["<|startoftext|>"]
+    if sys_text:
+        parts.append(sys_text)
+    # Instruct conversation template: \n\nUser: ... \n\nAssistant:
+    parts.append("\n\nUser: ")
+    if has_image_input:
+        parts.append("<img>")
+    parts.append(user_prompt)
+    parts.append("\n\nAssistant: ")
+    if trigger_tag:
+        parts.append(trigger_tag)
+
+    return "".join(parts)
diff --git a/tests/diffusion/models/hunyuan_image3/test_hunyuan_image3_sampler.py b/tests/diffusion/models/hunyuan_image3/test_hunyuan_image3_sampler.py
@@ -0,0 +1,190 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Unit tests for HunyuanImage3 AR sampler logic (stage transitions,
+ratio restriction, comprehension blocking)."""
+
+import pytest
+import torch
+
+pytestmark = [pytest.mark.core_model, pytest.mark.cpu]
+
+# Fake token IDs for testing (avoid importing the real model).
+END_OF_THINK = 100
+RECAPTION = 101
+END_OF_RECAPTION = 102
+ANSWER = 103
+BOI = 104
+SIZE_TOKEN = 105
+EOS = 106
+RATIO_START = 200
+RATIO_END = 210
+RATIO_OTHER_START = 220
+RATIO_OTHER_END = 223
+
+
+class FakeSamplerModel:
+    """Minimal stub that replicates the sampler-relevant attributes of
+    HunyuanImage3ForConditionalGeneration without loading real weights."""
+
+    def __init__(self, *, is_comprehension: bool = False):
+        self._is_comprehension = is_comprehension
+        self._eos_token_id = EOS
+        self._end_of_think_id = END_OF_THINK
+        self._recaption_id = RECAPTION
+        self._end_of_recaption_id = END_OF_RECAPTION
+        self._answer_id = ANSWER
+        self._mrope_boi_token_id = BOI
+        self._size_token_id = SIZE_TOKEN
+        self._start_ratio_id = RATIO_START
+        self._end_ratio_id = RATIO_END
+        self._ratio_other_slices = [(RATIO_OTHER_START, RATIO_OTHER_END + 1)]
+        self._all_ratio_ids = set(range(RATIO_START, RATIO_END + 1))
+        self._all_ratio_ids.update(range(RATIO_OTHER_START, RATIO_OTHER_END + 1))
+
+        self._stage_transitions: dict[int, list[int]] = {}
+        if not is_comprehension:
+            self._stage_transitions[END_OF_THINK] = [RECAPTION]
+            self._stage_transitions[END_OF_RECAPTION] = [ANSWER, BOI, SIZE_TOKEN]
+
+        self._blocked_token_ids: set[int] = set()
+        if is_comprehension:
+            self._blocked_token_ids.update([BOI, SIZE_TOKEN])
+            self._blocked_token_ids.update(self._all_ratio_ids)
+
+    # Bind the real methods from the model class.
+    from vllm_omni.model_executor.models.hunyuan_image3.hunyuan_image3 import (
+        HunyuanImage3ForConditionalGeneration as _Real,
+    )
+
+    _get_forced_token = _Real._get_forced_token
+    _apply_ratio_restriction = _Real._apply_ratio_restriction
+
+
+class TestGetForcedToken:
+    """Tests for the stateless _get_forced_token method."""
+
+    def setup_method(self):
+        self.model = FakeSamplerModel(is_comprehension=False)
+
+    def test_no_trigger_returns_none(self):
+        assert self.model._get_forced_token([1, 2, 3]) is None
+
+    def test_empty_history_returns_none(self):
+        assert self.model._get_forced_token([]) is None
+
+    def test_end_of_think_forces_recaption(self):
+        assert self.model._get_forced_token([END_OF_THINK]) == RECAPTION
+
+    def test_end_of_think_completed(self):
+        assert self.model._get_forced_token([END_OF_THINK, RECAPTION]) is None
+
+    def test_end_of_recaption_forces_answer(self):
+        tokens = [END_OF_THINK, RECAPTION, END_OF_RECAPTION]
+        assert self.model._get_forced_token(tokens) == ANSWER
+
+    def test_end_of_recaption_forces_boi_after_answer(self):
+        tokens = [END_OF_THINK, RECAPTION, END_OF_RECAPTION, ANSWER]
+        assert self.model._get_forced_token(tokens) == BOI
+
+    def test_end_of_recaption_forces_size_after_boi(self):
+        tokens = [END_OF_THINK, RECAPTION, END_OF_RECAPTION, ANSWER, BOI]
+        assert self.model._get_forced_token(tokens) == SIZE_TOKEN
+
+    def test_full_sequence_complete(self):
+        tokens = [END_OF_THINK, RECAPTION, END_OF_RECAPTION, ANSWER, BOI, SIZE_TOKEN]
+        assert self.model._get_forced_token(tokens) is None
+
+    def test_diverged_history_returns_none(self):
+        tokens = [END_OF_RECAPTION, 999]  # 999 != ANSWER
+        assert self.model._get_forced_token(tokens) is None
+
+    def test_later_trigger_takes_precedence(self):
+        tokens = [END_OF_THINK, RECAPTION, END_OF_RECAPTION]
+        assert self.model._get_forced_token(tokens) == ANSWER
+
+    def test_trigger_with_extra_tokens_before(self):
+        tokens = [1, 2, 3, END_OF_THINK]
+        assert self.model._get_forced_token(tokens) == RECAPTION
+
+
+class TestComprehensionBlocking:
+    """Tests for comprehension mode token blocking."""
+
+    def test_blocked_tokens_masked(self):
+        model = FakeSamplerModel(is_comprehension=True)
+        vocab_size = 300
+        logits = torch.zeros(1, vocab_size)
+        logits[0, BOI] = 5.0
+        logits[0, SIZE_TOKEN] = 3.0
+        logits[0, RATIO_START] = 2.0
+        min_score = torch.finfo(logits.dtype).min
+
+        for tid in model._blocked_token_ids:
+            if tid < vocab_size:
+                logits[0, tid] = min_score
+
+        assert logits[0, BOI].item() == min_score
+        assert logits[0, SIZE_TOKEN].item() == min_score
+        assert logits[0, RATIO_START].item() == min_score
+
+    def test_non_blocked_tokens_preserved(self):
+        model = FakeSamplerModel(is_comprehension=True)
+        vocab_size = 300
+        logits = torch.zeros(1, vocab_size)
+        logits[0, 50] = 7.0
+        min_score = torch.finfo(logits.dtype).min
+
+        for tid in model._blocked_token_ids:
+            if tid < vocab_size:
+                logits[0, tid] = min_score
+
+        assert logits[0, 50].item() == 7.0
+
+
+class TestRatioRestriction:
+    """Tests for _apply_ratio_restriction (greedy: only argmax ratio survives)."""
+
+    def test_greedy_selects_single_ratio_token(self):
+        model = FakeSamplerModel(is_comprehension=False)
+        vocab_size = 300
+        logits = torch.zeros(1, vocab_size)
+        logits[0, RATIO_START + 3] = 10.0
+        logits[0, RATIO_START + 1] = 5.0
+        logits[0, 50] = 20.0  # non-ratio, should be masked
+        min_score = torch.finfo(logits.dtype).min
+
+        model._apply_ratio_restriction(logits, 0, min_score)
+
+        assert logits[0, RATIO_START + 3].item() == 0
+        assert logits[0, RATIO_START + 1].item() == min_score
+        assert logits[0, 50].item() == min_score
+
+    def test_extra_ratio_slices_considered(self):
+        model = FakeSamplerModel(is_comprehension=False)
+        vocab_size = 300
+        logits = torch.zeros(1, vocab_size)
+        logits[0, RATIO_OTHER_START] = 15.0
+        logits[0, RATIO_START] = 5.0
+        min_score = torch.finfo(logits.dtype).min
+
+        model._apply_ratio_restriction(logits, 0, min_score)
+
+        assert logits[0, RATIO_OTHER_START].item() == 0
+        assert logits[0, RATIO_START].item() == min_score
+
+
+class TestForceEosAfterRatio:
+    """Tests that a ratio token as last_token forces EOS."""
+
+    def test_ratio_token_forces_eos(self):
+        model = FakeSamplerModel(is_comprehension=False)
+        vocab_size = 300
+        logits = torch.randn(1, vocab_size)
+        min_score = torch.finfo(logits.dtype).min
+
+        logits[0].fill_(min_score)
+        logits[0, model._eos_token_id] = 0
+
+        assert logits[0, EOS].item() == 0
+        non_eos_max = logits[0, :EOS].max().item()
+        assert non_eos_max == min_score
diff --git a/tests/e2e/offline_inference/test_hunyuanimage3_text2img.py b/tests/e2e/offline_inference/test_hunyuanimage3_text2img.py
@@ -17,7 +17,7 @@
 MODEL_NAME = "tencent/HunyuanImage-3.0"
 LOCAL_CLIP_PATH = "openai/clip-vit-base-patch32"
 REPO_ROOT = Path(__file__).resolve().parents[3]
-STAGE_CONFIG_PATH = REPO_ROOT / "vllm_omni" / "model_executor" / "stage_configs" / "hunyuan_image3_moe.yaml"
+STAGE_CONFIG_PATH = REPO_ROOT / "vllm_omni" / "model_executor" / "stage_configs" / "hunyuan_image3_t2i.yaml"
 
 pytestmark = [pytest.mark.advanced_model, pytest.mark.diffusion]