Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 44 additions & 0 deletions .buildkite/test-nightly.yml
Original file line number Diff line number Diff line change
Expand Up @@ -446,6 +446,50 @@ steps:
path: /mnt/hf-cache
type: DirectoryOrCreate

- label: ":full_moon: Diffusion X2I(&A&T) · HunyuanImage-3.0-Instruct e2e"
timeout_in_minutes: 60
commands:
- |
timeout 55m bash -c '
set -e
export VLLM_TEST_CLEAN_GPU_MEMORY=1
pytest -s -v tests/e2e/offline_inference/test_hunyuanimage3_i2t.py -m "advanced_model" --run-level "advanced_model"
pytest -s -v tests/e2e/offline_inference/test_hunyuanimage3_t2i.py -m "advanced_model" --run-level "advanced_model"
'
agents:
queue: "mithril-h100-pool"
plugins:
- kubernetes:
podSpec:
containers:
- image: 936637512419.dkr.ecr.us-west-2.amazonaws.com/vllm-ci-pull-through-cache/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
resources:
limits:
nvidia.com/gpu: 4
volumeMounts:
- name: devshm
mountPath: /dev/shm
- name: hf-cache
mountPath: /root/.cache/huggingface
env:
- name: HF_HOME
value: /root/.cache/huggingface
- name: HF_TOKEN
valueFrom:
secretKeyRef:
name: hf-token-secret
key: token
nodeSelector:
node.kubernetes.io/instance-type: gpu-h100-sxm
volumes:
- name: devshm
emptyDir:
medium: Memory
- name: hf-cache
hostPath:
path: /mnt/hf-cache
type: DirectoryOrCreate

- label: ":full_moon: Diffusion X2I(&A&T) · Perf Test"
key: nightly-diffusion-x2iat-performance
timeout_in_minutes: 180
Expand Down
77 changes: 77 additions & 0 deletions tests/e2e/offline_inference/test_hunyuanimage3_i2t.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
# ruff: noqa: E402
"""Smoke test for HunyuanImage-3.0 Image-to-Text (I2T) pipeline."""

import sys
from collections.abc import Generator
from pathlib import Path

import pytest
import torch

from vllm_omni import Omni

MODEL_NAME = "tencent/HunyuanImage-3.0-Instruct"
REPO_ROOT = Path(__file__).resolve().parents[3]
STAGE_CONFIG_PATH = REPO_ROOT / "vllm_omni" / "model_executor" / "stage_configs" / "hunyuan_image3_i2t.yaml"

# Longest stable prefix shared by HF greedy reference and vllm-omni AR output on
# this input (verified 2026-05-04 via scripts/bench/hf_i2t_pr2986_baseline.py +
# vllm_omni_i2t_pr2986_check.py). vllm-omni vs HF is not bitwise-alignable past
# this point — see memory/hf/hf_omni_alignment_method.md.
EXPECTED_PREFIX = "The image is a solid"

# Allow importing end2end from examples
sys.path.insert(0, str(REPO_ROOT / "examples" / "offline_inference" / "hunyuan_image3"))
from end2end import build_prompt

pytestmark = [pytest.mark.advanced_model, pytest.mark.diffusion]


@pytest.fixture(scope="module")
def omni() -> Generator[Omni, None, None]:
engine = Omni(
model=MODEL_NAME,
stage_configs_path=str(STAGE_CONFIG_PATH),
stage_init_timeout=600,
init_timeout=900,
)
try:
yield engine
finally:
engine.close()


@pytest.mark.skipif(torch.cuda.device_count() < 4, reason="Need at least 4 CUDA GPUs.")
def test_i2t_generates_text(omni: Omni) -> None:
"""Verify I2T output starts with the HF-aligned 20-char prefix `EXPECTED_PREFIX`."""
# Solid-color image keeps the input self-contained and reproducible.
from PIL import Image

input_image = Image.new("RGB", (256, 256), color=(128, 200, 100))

prompt = build_prompt("Describe the content of the picture.", task="i2t")
prompt_dict = {
"prompt": prompt,
"modalities": ["text"],
"multi_modal_data": {"image": input_image},
}

outputs = omni.generate(prompts=[prompt_dict])
assert outputs, "No outputs returned from Omni.generate()"

first_output = outputs[0]
request_output = getattr(first_output, "request_output", first_output)
assert request_output.outputs, "No completion outputs"

generated_text = request_output.outputs[0].text
assert isinstance(generated_text, str), f"Expected str, got {type(generated_text)}"
n = len(EXPECTED_PREFIX)
assert len(generated_text) >= n, f"AR output shorter than {n} chars (got {len(generated_text)}): {generated_text!r}"
assert generated_text[:n] == EXPECTED_PREFIX, (
f"AR prefix drift vs HF reference\n"
f" expected: {EXPECTED_PREFIX!r}\n"
f" actual : {generated_text[:n]!r}\n"
f" full : {generated_text!r}"
)
69 changes: 69 additions & 0 deletions tests/e2e/offline_inference/test_hunyuanimage3_t2i.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
# ruff: noqa: E402
"""Smoke test for HunyuanImage-3.0 Text-to-Image (T2I) pipeline."""

import sys
from collections.abc import Generator
from pathlib import Path

import pytest
import torch
from PIL import Image

from vllm_omni import Omni
from vllm_omni.inputs.data import OmniDiffusionSamplingParams

MODEL_NAME = "tencent/HunyuanImage-3.0-Instruct"
REPO_ROOT = Path(__file__).resolve().parents[3]
STAGE_CONFIG_PATH = REPO_ROOT / "vllm_omni" / "model_executor" / "stage_configs" / "hunyuan_image3_t2i.yaml"

sys.path.insert(0, str(REPO_ROOT / "examples" / "offline_inference" / "hunyuan_image3"))
from end2end import build_prompt

pytestmark = [pytest.mark.advanced_model, pytest.mark.diffusion]


@pytest.fixture(scope="module")
def omni() -> Generator[Omni, None, None]:
engine = Omni(
model=MODEL_NAME,
stage_configs_path=str(STAGE_CONFIG_PATH),
stage_init_timeout=600,
init_timeout=900,
)
try:
yield engine
finally:
engine.close()


@pytest.mark.skipif(torch.cuda.device_count() < 4, reason="Need at least 4 CUDA GPUs.")
def test_t2i_generates_image(omni: Omni) -> None:
"""Verify that the T2I pipeline produces a PIL Image output."""
sampling_params = OmniDiffusionSamplingParams(
seed=1234,
num_outputs_per_prompt=1,
)

prompt = build_prompt(
"A brown and white dog is running on the grass",
task="t2i_think",
)

outputs = omni.generate(
{"prompt": prompt, "modalities": ["image"]},
sampling_params,
)
assert outputs, "No outputs returned from Omni.generate()"

first_output = outputs[0]
images = getattr(first_output, "images", None)
if images is None:
request_output = getattr(first_output, "request_output", None)
assert request_output is not None, "No request_output in Omni output"
images = getattr(request_output, "images", None)

assert images and len(images) > 0, "No image was generated by the T2I pipeline"
assert isinstance(images[0], Image.Image), f"Expected PIL Image, got {type(images[0])}"
assert images[0].size[0] > 0 and images[0].size[1] > 0, "Generated image has zero dimensions"
Loading