Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
1486f39
[Fix] enforce Qwen-Image max_sequence_length before encoding
david6666666 Apr 16, 2026
efde282
[Fix] enforce Wan2.2 max_sequence_length before encoding
david6666666 Apr 16, 2026
cae196a
[Fix] apply pre-commit formatting
david6666666 Apr 16, 2026
0ec989b
[Refactor] merge diffusion prompt length validators
david6666666 Apr 16, 2026
973adde
[Fix] exclude Qwen template overhead from prompt length checks
david6666666 Apr 16, 2026
1f190d9
[Docs] annotate Qwen prompt length validation invariants
david6666666 Apr 16, 2026
7eeafcf
[Test] add pytest marks for max sequence UTs
david6666666 Apr 17, 2026
b63a3b7
[Bugfix] Preserve default diffusion sampling params in default stage
david6666666 Apr 14, 2026
ee577d7
Fix Qwen image edit plus prompt encoding memory
david6666666 Apr 16, 2026
4abda0c
Clamp Qwen image edit plus prompt length
david6666666 Apr 16, 2026
d8f0ff9
Limit Qwen image edit plus input images
david6666666 Apr 16, 2026
2596b22
Translate omni image edit input errors
david6666666 Apr 16, 2026
a1b3e50
Validate image edit limits in API layer
david6666666 Apr 16, 2026
a8763b3
Refine image edit input limit helpers
david6666666 Apr 16, 2026
09d8847
Simplify image edit input limit helper
david6666666 Apr 16, 2026
9acab70
Wrap image edit limit error message
david6666666 Apr 16, 2026
518640d
Use model-specific image edit limits
david6666666 Apr 16, 2026
4814a9a
Reject over-limit image edits before loading
david6666666 Apr 16, 2026
854fad6
Document image edit limit handling
david6666666 Apr 16, 2026
1a9dbd3
[Fix] Make image edit input limits config-driven
david6666666 Apr 17, 2026
c2a0230
[Refactor] Move diffusion image limits into shared metadata
david6666666 Apr 17, 2026
125dc9d
[Fix] Apply pre-commit import ordering
david6666666 Apr 17, 2026
13ca70f
[Docs] Clarify shared diffusion image limit metadata
david6666666 Apr 17, 2026
bbaf682
Fix RIFE device selection for CPU transport
david6666666 Apr 17, 2026
beeb333
[Chore] run pre-commit formatting
david6666666 Apr 17, 2026
d7233cb
[Fix] align Wan2.2 max_sequence_length with model config
david6666666 Apr 17, 2026
25ac7cd
[Fix] raise Wan2.2 max_sequence_length to 2048
david6666666 Apr 17, 2026
67e52e8
[Chore] run pre-commit after PR2877 backport
david6666666 Apr 17, 2026
5041f7e
Fix async video job profiler metadata persistence
david6666666 Apr 17, 2026
6e87f20
Revert "Fix async video job profiler metadata persistence"
david6666666 Apr 17, 2026
f0c4c1f
Fix release backport unit regressions
david6666666 Apr 17, 2026
210bb37
Revert "Fix release backport unit regressions"
david6666666 Apr 17, 2026
c3540ee
Fix 400 status for prompt length validation
david6666666 Apr 18, 2026
5be6ff5
[Fix] avoid padding short Wan2.2 prompts to max_sequence_length
david6666666 Apr 19, 2026
7930665
Merge branch 'release/v0.18.0.post1' into codex/release-v0.18.0.post1…
david6666666 Apr 20, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 38 additions & 0 deletions tests/diffusion/models/qwen_image/test_qwen_image_edit_plus.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# SPDX-License-Identifier: Apache-2.0

import json
from pathlib import Path
from types import SimpleNamespace

import numpy as np
import pytest
from PIL import Image

from vllm_omni.diffusion.models.qwen_image.pipeline_qwen_image_edit_plus import (
get_qwen_image_edit_plus_pre_process_func,
)

pytestmark = [pytest.mark.core_model, pytest.mark.diffusion, pytest.mark.cpu]


def test_qwen_image_edit_plus_rejects_too_many_input_images(tmp_path: Path):
vae_dir = tmp_path / "vae"
vae_dir.mkdir()
# Keep the mock config intentionally minimal: this test only needs the
# fields touched during pre-process initialization.
(vae_dir / "config.json").write_text(json.dumps({"z_dim": 16}))

pre_process = get_qwen_image_edit_plus_pre_process_func(SimpleNamespace(model=str(tmp_path)))
image = Image.fromarray(np.zeros((32, 32, 3), dtype=np.uint8))
request = SimpleNamespace(
prompts=[
{
"prompt": "combine",
"multi_modal_data": {"image": [image, image, image, image, image]},
}
],
sampling_params=SimpleNamespace(height=None, width=None),
)

with pytest.raises(ValueError, match=r"At most 4 images are supported by this model"):
pre_process(request)
Original file line number Diff line number Diff line change
@@ -0,0 +1,260 @@
import inspect
from types import SimpleNamespace

import pytest
import torch
from torch import nn

from vllm_omni.diffusion.models.qwen_image.pipeline_qwen_image import (
QwenImagePipeline,
)
from vllm_omni.diffusion.models.qwen_image.pipeline_qwen_image_edit import (
QwenImageEditPipeline,
)
from vllm_omni.diffusion.models.qwen_image.pipeline_qwen_image_edit_plus import (
QwenImageEditPlusPipeline,
)
from vllm_omni.diffusion.models.qwen_image.pipeline_qwen_image_layered import (
QwenImageLayeredPipeline,
)

pytestmark = [pytest.mark.core_model, pytest.mark.cpu]


class _RejectingTextEncoder:
dtype = torch.float32

def __call__(self, *args, **kwargs):
raise AssertionError("text encoder should not run for prompts that exceed max_sequence_length")


class _FakeModelInputs:
def __init__(self, total_sequence_length: int):
attention_mask = torch.ones((1, total_sequence_length), dtype=torch.long)
self.input_ids = attention_mask.clone()
self.attention_mask = attention_mask
self.pixel_values = None
self.image_grid_thw = None

def to(self, device):
return self


class _FakeTokenizer:
def __init__(self, total_sequence_length: int | list[int]):
if isinstance(total_sequence_length, list):
self.total_sequence_lengths = list(total_sequence_length)
else:
self.total_sequence_lengths = [total_sequence_length]

def __call__(self, *args, **kwargs):
if len(self.total_sequence_lengths) > 1:
total_sequence_length = self.total_sequence_lengths.pop(0)
else:
total_sequence_length = self.total_sequence_lengths[0]
return _FakeModelInputs(total_sequence_length)


class _FakeProcessor(_FakeTokenizer):
pass


class _FakeScheduler:
def __init__(self):
self.begin_index = None

def set_begin_index(self, begin_index: int):
self.begin_index = begin_index


PIPELINE_CASES = [
pytest.param(QwenImagePipeline, 34, "tokenizer", id="qwen-image"),
pytest.param(QwenImageLayeredPipeline, 34, "tokenizer", id="qwen-image-layered"),
pytest.param(QwenImageEditPipeline, 64, "processor", id="qwen-image-edit"),
pytest.param(QwenImageEditPlusPipeline, 64, "processor", id="qwen-image-edit-plus"),
]


def _make_pipeline(
pipeline_class: type,
*,
total_sequence_length: int,
drop_idx: int,
input_kind: str,
):
pipeline = object.__new__(pipeline_class)
nn.Module.__init__(pipeline)
pipeline.device = torch.device("cpu")
pipeline.text_encoder = _RejectingTextEncoder()
pipeline.tokenizer_max_length = 1024
pipeline.prompt_template_encode = "{}"
pipeline.prompt_template_encode_start_idx = drop_idx
pipeline.tokenizer = _FakeTokenizer([total_sequence_length, 0])
if input_kind == "processor":
pipeline.processor = _FakeProcessor(total_sequence_length)
return pipeline


@pytest.mark.parametrize(("pipeline_class", "drop_idx", "input_kind"), PIPELINE_CASES)
def test_encode_prompt_rejects_prompt_longer_than_default_max_sequence_length(
pipeline_class: type,
drop_idx: int,
input_kind: str,
):
pipeline = _make_pipeline(
pipeline_class,
total_sequence_length=1025,
drop_idx=drop_idx,
input_kind=input_kind,
)

with pytest.raises(ValueError, match=r"got 1025 tokens, but `max_sequence_length` is 1024"):
pipeline.encode_prompt(prompt="prompt")


@pytest.mark.parametrize(("pipeline_class", "drop_idx", "input_kind"), PIPELINE_CASES)
def test_encode_prompt_rejects_prompt_longer_than_explicit_max_sequence_length(
pipeline_class: type,
drop_idx: int,
input_kind: str,
):
pipeline = _make_pipeline(
pipeline_class,
total_sequence_length=17,
drop_idx=drop_idx,
input_kind=input_kind,
)

with pytest.raises(ValueError, match=r"got 17 tokens, but `max_sequence_length` is 16"):
pipeline.encode_prompt(prompt="prompt", max_sequence_length=16)


def test_prepare_encode_defaults_to_tokenizer_max_length():
pipeline = object.__new__(QwenImagePipeline)
nn.Module.__init__(pipeline)
pipeline.tokenizer_max_length = 1024
pipeline.vae_scale_factor = 8
pipeline.default_sample_size = 128
pipeline.scheduler = _FakeScheduler()
pipeline._extract_prompts = lambda prompts: (["prompt"], None)

captured = {}

def _fake_prepare_generation_context(**kwargs):
captured["max_sequence_length"] = kwargs["max_sequence_length"]
embeds = torch.ones((1, 1, 1))
mask = torch.ones((1, 1), dtype=torch.long)
return {
"prompt_embeds": embeds,
"prompt_embeds_mask": mask,
"negative_prompt_embeds": None,
"negative_prompt_embeds_mask": None,
"latents": embeds,
"timesteps": torch.tensor([1]),
"do_true_cfg": False,
"guidance": None,
"img_shapes": [[(1, 1, 1)]],
"txt_seq_lens": [1],
"negative_txt_seq_lens": None,
}

pipeline._prepare_generation_context = _fake_prepare_generation_context
state = SimpleNamespace(
prompts=["prompt"],
sampling=SimpleNamespace(
height=None,
width=None,
num_inference_steps=None,
sigmas=None,
guidance_scale_provided=False,
num_outputs_per_prompt=0,
generator=None,
true_cfg_scale=None,
max_sequence_length=None,
),
)

pipeline.prepare_encode(state)

assert captured["max_sequence_length"] == 1024


@pytest.mark.parametrize(
("pipeline_class", "drop_idx"),
[
pytest.param(QwenImageEditPipeline, 64, id="qwen-image-edit"),
pytest.param(QwenImageEditPlusPipeline, 64, id="qwen-image-edit-plus"),
],
)
def test_edit_pipelines_validate_text_prompt_length_before_image_token_expansion(
pipeline_class: type,
drop_idx: int,
):
pipeline = object.__new__(pipeline_class)
nn.Module.__init__(pipeline)
pipeline.device = torch.device("cpu")
pipeline.text_encoder = _RejectingTextEncoder()
pipeline.tokenizer_max_length = 1024
pipeline.prompt_template_encode = "{}"
pipeline.prompt_template_encode_start_idx = drop_idx
pipeline.tokenizer = _FakeTokenizer([8, 0])
pipeline.processor = _FakeProcessor(drop_idx + 1500)

with pytest.raises(AssertionError, match="text encoder should not run"):
pipeline.encode_prompt(prompt="short prompt")


@pytest.mark.parametrize(
"pipeline_class",
[
pytest.param(QwenImagePipeline, id="qwen-image"),
pytest.param(QwenImageLayeredPipeline, id="qwen-image-layered"),
],
)
def test_qwen_generation_validator_excludes_template_suffix_from_budget(pipeline_class: type):
pipeline = object.__new__(pipeline_class)
nn.Module.__init__(pipeline)
pipeline.device = torch.device("cpu")
pipeline.text_encoder = _RejectingTextEncoder()
pipeline.tokenizer_max_length = 1024
pipeline.prompt_template_encode = "{}"
pipeline.prompt_template_encode_start_idx = 34
pipeline.tokenizer = _FakeTokenizer([1029, 5])

with pytest.raises(AssertionError, match="text encoder should not run"):
pipeline.encode_prompt(prompt="boundary prompt")


@pytest.mark.parametrize(
"pipeline_class",
[
pytest.param(QwenImageEditPipeline, id="qwen-image-edit"),
pytest.param(QwenImageEditPlusPipeline, id="qwen-image-edit-plus"),
],
)
def test_qwen_edit_validator_excludes_image_placeholders_from_budget(pipeline_class: type):
pipeline = object.__new__(pipeline_class)
nn.Module.__init__(pipeline)
pipeline.device = torch.device("cpu")
pipeline.text_encoder = _RejectingTextEncoder()
pipeline.tokenizer_max_length = 1024
pipeline.prompt_template_encode = "{}"
pipeline.prompt_template_encode_start_idx = 64
pipeline.tokenizer = _FakeTokenizer([30, 20])
pipeline.processor = _FakeProcessor(1500)

with pytest.raises(AssertionError, match="text encoder should not run"):
pipeline.encode_prompt(prompt="short prompt")


@pytest.mark.parametrize(
"pipeline_class",
[
QwenImagePipeline,
QwenImageLayeredPipeline,
QwenImageEditPipeline,
QwenImageEditPlusPipeline,
],
)
def test_forward_max_sequence_length_default_is_1024(pipeline_class: type):
assert inspect.signature(pipeline_class.forward).parameters["max_sequence_length"].default == 1024
Loading