vllm-project · TaffyOfficial · Apr 21, 2026 · May 4, 2026
@@ -446,6 +446,50 @@ steps:
                       path: /mnt/hf-cache
                       type: DirectoryOrCreate
 
+      - label: ":full_moon: Diffusion X2I(&A&T) · HunyuanImage-3.0-Instruct e2e"
+        timeout_in_minutes: 60
+        commands:
+          - |
+            timeout 55m bash -c '
+              set -e
+              export VLLM_TEST_CLEAN_GPU_MEMORY=1
+              pytest -s -v tests/e2e/offline_inference/test_hunyuanimage3_i2t.py -m "advanced_model" --run-level "advanced_model"
+              pytest -s -v tests/e2e/offline_inference/test_hunyuanimage3_t2i.py -m "advanced_model" --run-level "advanced_model"
+            '
+        agents:
+          queue: "mithril-h100-pool"
+        plugins:
+          - kubernetes:
+              podSpec:
+                containers:
+                  - image: 936637512419.dkr.ecr.us-west-2.amazonaws.com/vllm-ci-pull-through-cache/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
+                    resources:
+                      limits:
+                        nvidia.com/gpu: 4
+                    volumeMounts:
+                      - name: devshm
+                        mountPath: /dev/shm
+                      - name: hf-cache
+                        mountPath: /root/.cache/huggingface
+                    env:
+                      - name: HF_HOME
+                        value: /root/.cache/huggingface
+                      - name: HF_TOKEN
+                        valueFrom:
+                          secretKeyRef:
+                            name: hf-token-secret
+                            key: token
+                nodeSelector:
+                  node.kubernetes.io/instance-type: gpu-h100-sxm
+                volumes:
+                  - name: devshm
+                    emptyDir:
+                      medium: Memory
+                  - name: hf-cache
+                    hostPath:
+                      path: /mnt/hf-cache
+                      type: DirectoryOrCreate
+
       - label: ":full_moon: Diffusion X2I(&A&T) · Perf Test"
         key: nightly-diffusion-x2iat-performance
         timeout_in_minutes: 180

@@ -0,0 +1,77 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+# ruff: noqa: E402
+"""Smoke test for HunyuanImage-3.0 Image-to-Text (I2T) pipeline."""
+
+import sys
+from collections.abc import Generator
+from pathlib import Path
+
+import pytest
+import torch
+
+from vllm_omni import Omni
+
+MODEL_NAME = "tencent/HunyuanImage-3.0-Instruct"
+REPO_ROOT = Path(__file__).resolve().parents[3]
+STAGE_CONFIG_PATH = REPO_ROOT / "vllm_omni" / "model_executor" / "stage_configs" / "hunyuan_image3_i2t.yaml"
+
+# Longest stable prefix shared by HF greedy reference and vllm-omni AR output on
+# this input (verified 2026-05-04 via scripts/bench/hf_i2t_pr2986_baseline.py +
+# vllm_omni_i2t_pr2986_check.py). vllm-omni vs HF is not bitwise-alignable past
+# this point — see memory/hf/hf_omni_alignment_method.md.
+EXPECTED_PREFIX = "The image is a solid"
+
+# Allow importing end2end from examples
+sys.path.insert(0, str(REPO_ROOT / "examples" / "offline_inference" / "hunyuan_image3"))
+from end2end import build_prompt
+
+pytestmark = [pytest.mark.advanced_model, pytest.mark.diffusion]
+
+
+@pytest.fixture(scope="module")
+def omni() -> Generator[Omni, None, None]:
+    engine = Omni(
+        model=MODEL_NAME,
+        stage_configs_path=str(STAGE_CONFIG_PATH),
+        stage_init_timeout=600,
+        init_timeout=900,
+    )
+    try:
+        yield engine
+    finally:
+        engine.close()
+
+
+@pytest.mark.skipif(torch.cuda.device_count() < 4, reason="Need at least 4 CUDA GPUs.")
+def test_i2t_generates_text(omni: Omni) -> None:
+    """Verify I2T output starts with the HF-aligned 20-char prefix `EXPECTED_PREFIX`."""
+    # Solid-color image keeps the input self-contained and reproducible.
+    from PIL import Image
+
+    input_image = Image.new("RGB", (256, 256), color=(128, 200, 100))
+
+    prompt = build_prompt("Describe the content of the picture.", task="i2t")
+    prompt_dict = {
+        "prompt": prompt,
+        "modalities": ["text"],
+        "multi_modal_data": {"image": input_image},
+    }
+
+    outputs = omni.generate(prompts=[prompt_dict])
+    assert outputs, "No outputs returned from Omni.generate()"
+
+    first_output = outputs[0]
+    request_output = getattr(first_output, "request_output", first_output)
+    assert request_output.outputs, "No completion outputs"
+
+    generated_text = request_output.outputs[0].text
+    assert isinstance(generated_text, str), f"Expected str, got {type(generated_text)}"
+    n = len(EXPECTED_PREFIX)
+    assert len(generated_text) >= n, f"AR output shorter than {n} chars (got {len(generated_text)}): {generated_text!r}"
+    assert generated_text[:n] == EXPECTED_PREFIX, (
+        f"AR prefix drift vs HF reference\n"
+        f"  expected: {EXPECTED_PREFIX!r}\n"
+        f"  actual  : {generated_text[:n]!r}\n"
+        f"  full    : {generated_text!r}"
+    )
@@ -0,0 +1,69 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+# ruff: noqa: E402
+"""Smoke test for HunyuanImage-3.0 Text-to-Image (T2I) pipeline."""
+
+import sys
+from collections.abc import Generator
+from pathlib import Path
+
+import pytest
+import torch
+from PIL import Image
+
+from vllm_omni import Omni
+from vllm_omni.inputs.data import OmniDiffusionSamplingParams
+
+MODEL_NAME = "tencent/HunyuanImage-3.0-Instruct"
+REPO_ROOT = Path(__file__).resolve().parents[3]
+STAGE_CONFIG_PATH = REPO_ROOT / "vllm_omni" / "model_executor" / "stage_configs" / "hunyuan_image3_t2i.yaml"
+
+sys.path.insert(0, str(REPO_ROOT / "examples" / "offline_inference" / "hunyuan_image3"))
+from end2end import build_prompt
+
+pytestmark = [pytest.mark.advanced_model, pytest.mark.diffusion]
+
+
+@pytest.fixture(scope="module")
+def omni() -> Generator[Omni, None, None]:
+    engine = Omni(
+        model=MODEL_NAME,
+        stage_configs_path=str(STAGE_CONFIG_PATH),
+        stage_init_timeout=600,
+        init_timeout=900,
+    )
+    try:
+        yield engine
+    finally:
+        engine.close()
+
+
+@pytest.mark.skipif(torch.cuda.device_count() < 4, reason="Need at least 4 CUDA GPUs.")
+def test_t2i_generates_image(omni: Omni) -> None:
+    """Verify that the T2I pipeline produces a PIL Image output."""
+    sampling_params = OmniDiffusionSamplingParams(
+        seed=1234,
+        num_outputs_per_prompt=1,
+    )
+
+    prompt = build_prompt(
+        "A brown and white dog is running on the grass",
+        task="t2i_think",
+    )
+
+    outputs = omni.generate(
+        {"prompt": prompt, "modalities": ["image"]},
+        sampling_params,
+    )
+    assert outputs, "No outputs returned from Omni.generate()"
+
+    first_output = outputs[0]
+    images = getattr(first_output, "images", None)
+    if images is None:
+        request_output = getattr(first_output, "request_output", None)
+        assert request_output is not None, "No request_output in Omni output"
+        images = getattr(request_output, "images", None)
+
+    assert images and len(images) > 0, "No image was generated by the T2I pipeline"
+    assert isinstance(images[0], Image.Image), f"Expected PIL Image, got {type(images[0])}"
+    assert images[0].size[0] > 0 and images[0].size[1] > 0, "Generated image has zero dimensions"