Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
88 changes: 88 additions & 0 deletions examples/offline_inference/hunyuan_image3/prompt_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""
Prompt construction utilities for HunyuanImage-3.0-Instruct examples.

Wraps system_prompt.get_system_prompt() with task-aware presets so that
examples and tests don't need to manually concatenate system prompts,
<img>, <think>, and <recaption> tags.

Usage:
from prompt_utils import build_prompt

# IT2I (image editing, think+recaption mode)
prompt = build_prompt("Make the petals neon pink", task="it2i_think")

# I2T (image understanding)
prompt = build_prompt("Describe the content of the picture.", task="i2t")
"""

from __future__ import annotations

from vllm_omni.diffusion.models.hunyuan_image3.system_prompt import (
get_system_prompt,
)

# task → (sys_type, bot_task, trigger_tag)
# trigger_tag: "<think>", "<recaption>", or None
_TASK_PRESETS: dict[str, tuple[str, str | None, str | None]] = {
# Pure text generation (text → text, no image)
"t2t": ("en_unified", None, None),
# Image understanding (image → text)
"i2t": ("en_unified", None, None),
# Image editing (image+text → image), think+recaption mode
"it2i_think": ("en_unified", "think", "<think>"),
# Image editing, recaption-only mode
"it2i_recaption": ("en_unified", "recaption", "<recaption>"),
# Text-to-image, think mode
"t2i_think": ("en_unified", "think", "<think>"),
# Text-to-image, recaption mode
"t2i_recaption": ("en_unified", "recaption", "<recaption>"),
# Text-to-image, vanilla (no CoT)
"t2i_vanilla": ("en_vanilla", "image", None),
}


def build_prompt(
user_prompt: str,
task: str = "it2i_think",
sys_type: str | None = None,
custom_system_prompt: str | None = None,
) -> str:
"""Build a complete HunyuanImage-3.0 prompt with auto-selected system
prompt and mode trigger tags.

Args:
user_prompt: The user's raw instruction or question.
task: One of the preset task keys (see _TASK_PRESETS).
sys_type: Override the preset's sys_type for get_system_prompt().
custom_system_prompt: Custom system prompt text (used when
sys_type="custom").

Returns:
Fully formatted prompt string ready for Omni.generate().
"""
if task not in _TASK_PRESETS:
raise ValueError(f"Unknown task {task!r}. Choose from: {sorted(_TASK_PRESETS)}")

preset_sys_type, preset_bot_task, trigger_tag = _TASK_PRESETS[task]
effective_sys_type = sys_type or preset_sys_type

system_prompt = get_system_prompt(effective_sys_type, preset_bot_task, custom_system_prompt)
sys_text = system_prompt.strip() if system_prompt else ""

has_image_input = task.startswith("i2t") or task.startswith("it2i")

parts = ["<|startoftext|>"]
if sys_text:
parts.append(sys_text)
# Instruct conversation template: \n\nUser: ... \n\nAssistant:
parts.append("\n\nUser: ")
if has_image_input:
parts.append("<img>")
parts.append(user_prompt)
parts.append("\n\nAssistant: ")
if trigger_tag:
parts.append(trigger_tag)

return "".join(parts)
190 changes: 190 additions & 0 deletions tests/diffusion/models/hunyuan_image3/test_hunyuan_image3_sampler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,190 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""Unit tests for HunyuanImage3 AR sampler logic (stage transitions,
ratio restriction, comprehension blocking)."""

import pytest
import torch

pytestmark = [pytest.mark.core_model, pytest.mark.cpu]

# Fake token IDs for testing (avoid importing the real model).
END_OF_THINK = 100
RECAPTION = 101
END_OF_RECAPTION = 102
ANSWER = 103
BOI = 104
SIZE_TOKEN = 105
EOS = 106
RATIO_START = 200
RATIO_END = 210
RATIO_OTHER_START = 220
RATIO_OTHER_END = 223


class FakeSamplerModel:
"""Minimal stub that replicates the sampler-relevant attributes of
HunyuanImage3ForConditionalGeneration without loading real weights."""

def __init__(self, *, is_comprehension: bool = False):
self._is_comprehension = is_comprehension
self._eos_token_id = EOS
self._end_of_think_id = END_OF_THINK
self._recaption_id = RECAPTION
self._end_of_recaption_id = END_OF_RECAPTION
self._answer_id = ANSWER
self._mrope_boi_token_id = BOI
self._size_token_id = SIZE_TOKEN
self._start_ratio_id = RATIO_START
self._end_ratio_id = RATIO_END
self._ratio_other_slices = [(RATIO_OTHER_START, RATIO_OTHER_END + 1)]
self._all_ratio_ids = set(range(RATIO_START, RATIO_END + 1))
self._all_ratio_ids.update(range(RATIO_OTHER_START, RATIO_OTHER_END + 1))

self._stage_transitions: dict[int, list[int]] = {}
if not is_comprehension:
self._stage_transitions[END_OF_THINK] = [RECAPTION]
self._stage_transitions[END_OF_RECAPTION] = [ANSWER, BOI, SIZE_TOKEN]

self._blocked_token_ids: set[int] = set()
if is_comprehension:
self._blocked_token_ids.update([BOI, SIZE_TOKEN])
self._blocked_token_ids.update(self._all_ratio_ids)

# Bind the real methods from the model class.
from vllm_omni.model_executor.models.hunyuan_image3.hunyuan_image3 import (
HunyuanImage3ForConditionalGeneration as _Real,
)

_get_forced_token = _Real._get_forced_token
_apply_ratio_restriction = _Real._apply_ratio_restriction


class TestGetForcedToken:
"""Tests for the stateless _get_forced_token method."""

def setup_method(self):
self.model = FakeSamplerModel(is_comprehension=False)

def test_no_trigger_returns_none(self):
assert self.model._get_forced_token([1, 2, 3]) is None

def test_empty_history_returns_none(self):
assert self.model._get_forced_token([]) is None

def test_end_of_think_forces_recaption(self):
assert self.model._get_forced_token([END_OF_THINK]) == RECAPTION

def test_end_of_think_completed(self):
assert self.model._get_forced_token([END_OF_THINK, RECAPTION]) is None

def test_end_of_recaption_forces_answer(self):
tokens = [END_OF_THINK, RECAPTION, END_OF_RECAPTION]
assert self.model._get_forced_token(tokens) == ANSWER

def test_end_of_recaption_forces_boi_after_answer(self):
tokens = [END_OF_THINK, RECAPTION, END_OF_RECAPTION, ANSWER]
assert self.model._get_forced_token(tokens) == BOI

def test_end_of_recaption_forces_size_after_boi(self):
tokens = [END_OF_THINK, RECAPTION, END_OF_RECAPTION, ANSWER, BOI]
assert self.model._get_forced_token(tokens) == SIZE_TOKEN

def test_full_sequence_complete(self):
tokens = [END_OF_THINK, RECAPTION, END_OF_RECAPTION, ANSWER, BOI, SIZE_TOKEN]
assert self.model._get_forced_token(tokens) is None

def test_diverged_history_returns_none(self):
tokens = [END_OF_RECAPTION, 999] # 999 != ANSWER
assert self.model._get_forced_token(tokens) is None

def test_later_trigger_takes_precedence(self):
tokens = [END_OF_THINK, RECAPTION, END_OF_RECAPTION]
assert self.model._get_forced_token(tokens) == ANSWER

def test_trigger_with_extra_tokens_before(self):
tokens = [1, 2, 3, END_OF_THINK]
assert self.model._get_forced_token(tokens) == RECAPTION


class TestComprehensionBlocking:
"""Tests for comprehension mode token blocking."""

def test_blocked_tokens_masked(self):
model = FakeSamplerModel(is_comprehension=True)
vocab_size = 300
logits = torch.zeros(1, vocab_size)
logits[0, BOI] = 5.0
logits[0, SIZE_TOKEN] = 3.0
logits[0, RATIO_START] = 2.0
min_score = torch.finfo(logits.dtype).min

for tid in model._blocked_token_ids:
if tid < vocab_size:
logits[0, tid] = min_score

assert logits[0, BOI].item() == min_score
assert logits[0, SIZE_TOKEN].item() == min_score
assert logits[0, RATIO_START].item() == min_score

def test_non_blocked_tokens_preserved(self):
model = FakeSamplerModel(is_comprehension=True)
vocab_size = 300
logits = torch.zeros(1, vocab_size)
logits[0, 50] = 7.0
min_score = torch.finfo(logits.dtype).min

for tid in model._blocked_token_ids:
if tid < vocab_size:
logits[0, tid] = min_score

assert logits[0, 50].item() == 7.0


class TestRatioRestriction:
"""Tests for _apply_ratio_restriction (greedy: only argmax ratio survives)."""

def test_greedy_selects_single_ratio_token(self):
model = FakeSamplerModel(is_comprehension=False)
vocab_size = 300
logits = torch.zeros(1, vocab_size)
logits[0, RATIO_START + 3] = 10.0
logits[0, RATIO_START + 1] = 5.0
logits[0, 50] = 20.0 # non-ratio, should be masked
min_score = torch.finfo(logits.dtype).min

model._apply_ratio_restriction(logits, 0, min_score)

assert logits[0, RATIO_START + 3].item() == 0
assert logits[0, RATIO_START + 1].item() == min_score
assert logits[0, 50].item() == min_score

def test_extra_ratio_slices_considered(self):
model = FakeSamplerModel(is_comprehension=False)
vocab_size = 300
logits = torch.zeros(1, vocab_size)
logits[0, RATIO_OTHER_START] = 15.0
logits[0, RATIO_START] = 5.0
min_score = torch.finfo(logits.dtype).min

model._apply_ratio_restriction(logits, 0, min_score)

assert logits[0, RATIO_OTHER_START].item() == 0
assert logits[0, RATIO_START].item() == min_score


class TestForceEosAfterRatio:
"""Tests that a ratio token as last_token forces EOS."""

def test_ratio_token_forces_eos(self):
model = FakeSamplerModel(is_comprehension=False)
vocab_size = 300
logits = torch.randn(1, vocab_size)
min_score = torch.finfo(logits.dtype).min

logits[0].fill_(min_score)
logits[0, model._eos_token_id] = 0

assert logits[0, EOS].item() == 0
non_eos_max = logits[0, :EOS].max().item()
assert non_eos_max == min_score
2 changes: 1 addition & 1 deletion tests/e2e/offline_inference/test_hunyuanimage3_text2img.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
MODEL_NAME = "tencent/HunyuanImage-3.0"
LOCAL_CLIP_PATH = "openai/clip-vit-base-patch32"
REPO_ROOT = Path(__file__).resolve().parents[3]
STAGE_CONFIG_PATH = REPO_ROOT / "vllm_omni" / "model_executor" / "stage_configs" / "hunyuan_image3_moe.yaml"
STAGE_CONFIG_PATH = REPO_ROOT / "vllm_omni" / "model_executor" / "stage_configs" / "hunyuan_image3_t2i.yaml"
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hunyuan_image3_t2i.yaml doesn't exist in this PR. The deleted hunyuan_image3_moe.yaml is replaced by i2t/it2i/t2t configs, but there's no t2i config for pure text-to-image. This test will fail with FileNotFoundError.

Copy link
Copy Markdown
Contributor Author

@TaffyOfficial TaffyOfficial Apr 15, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

image

This file has been merged into the library along with # 2712, #2712


pytestmark = [pytest.mark.advanced_model, pytest.mark.diffusion]

Expand Down
Loading
Loading