Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
7532a20
[Renderer] Move InputPreprocessor into Renderer (2/2)
DarkLight1337 Feb 14, 2026
f644cc2
mypy
DarkLight1337 Feb 14, 2026
3f9b557
Merge branch 'main' into mv-mm-processor-2
DarkLight1337 Feb 15, 2026
a438dc4
mypy
DarkLight1337 Feb 15, 2026
f26d529
Fix
DarkLight1337 Feb 15, 2026
98c7515
Update
DarkLight1337 Feb 15, 2026
4d89390
Fix imports
DarkLight1337 Feb 15, 2026
4054307
Remove duplicate prompt text extraction
DarkLight1337 Feb 15, 2026
a775af0
Fix
DarkLight1337 Feb 15, 2026
b1a5b7a
Redundant code
DarkLight1337 Feb 15, 2026
494ea05
Merge branch 'main' into mv-mm-processor-2
DarkLight1337 Feb 16, 2026
4eb4044
Remove skips
DarkLight1337 Feb 16, 2026
67c4e64
Fix beam search and factor out `_validate_and_run_requests`
DarkLight1337 Feb 16, 2026
f1e34ab
Fix audio replacement
DarkLight1337 Feb 16, 2026
ccc1551
Fix progress bar
DarkLight1337 Feb 16, 2026
1e60a00
Remove print
DarkLight1337 Feb 16, 2026
8b7e4f4
Fix?
DarkLight1337 Feb 16, 2026
c30e184
Improve log
DarkLight1337 Feb 16, 2026
80ef643
Simplify
DarkLight1337 Feb 16, 2026
7f79599
Fix
DarkLight1337 Feb 16, 2026
2b4fb63
mypy
DarkLight1337 Feb 16, 2026
e9c045c
Fix
DarkLight1337 Feb 16, 2026
e8b98b2
Fixes
DarkLight1337 Feb 16, 2026
98950e5
Fix UUIDs
DarkLight1337 Feb 16, 2026
209df9a
More fixes; move to renderer test
DarkLight1337 Feb 16, 2026
4d10352
Fix
DarkLight1337 Feb 17, 2026
eb2b8ea
Merge branch 'main' into mv-mm-processor-2
DarkLight1337 Feb 17, 2026
701a29e
Fix wrong request ID
DarkLight1337 Feb 17, 2026
b7d700b
Additional check
DarkLight1337 Feb 17, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 6 additions & 9 deletions tests/entrypoints/llm/test_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,18 +195,15 @@ def test_chat_batch_failure_cleanup(llm_for_failure_test):
valid_msg = [{"role": "user", "content": "Hello"}]
long_text = "This is a very long text to test the error " * 50
invalid_msg = [{"role": "user", "content": long_text}]
batch_1 = [
valid_msg,
valid_msg,
invalid_msg,
]
batch_2 = [
valid_msg,
valid_msg,
]

batch_1 = [valid_msg, valid_msg, invalid_msg]
batch_2 = [valid_msg, valid_msg]
sampling_params = SamplingParams(temperature=0, max_tokens=10)

with pytest.raises(ValueError, match="context length is only"):
llm.chat(batch_1, sampling_params=sampling_params)
assert llm.llm_engine.get_num_unfinished_requests() == 0

outputs_2 = llm.chat(batch_2, sampling_params=sampling_params)
assert len(outputs_2) == len(batch_2)
assert llm.llm_engine.get_num_unfinished_requests() == 0
5 changes: 3 additions & 2 deletions tests/models/multimodal/processing/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -489,8 +489,9 @@ def _assert_inputs_equal(
if ignore_mm_keys is None:
ignore_mm_keys = set()

a_rest = {k: v for k, v in a.items() if k != "mm_kwargs"}
b_rest = {k: v for k, v in b.items() if k != "mm_kwargs"}
ignore_prompt_keys = ("prompt", "mm_kwargs")
a_rest = {k: v for k, v in a.items() if k not in ignore_prompt_keys}
b_rest = {k: v for k, v in b.items() if k not in ignore_prompt_keys}

assert a_rest == b_rest, msg

Expand Down
165 changes: 165 additions & 0 deletions tests/renderers/test_process_multi_modal_uuids.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project

import pytest

from vllm.assets.image import ImageAsset
from vllm.assets.video import VideoAsset
from vllm.config import CacheConfig, ModelConfig, VllmConfig
from vllm.renderers.hf import HfRenderer
from vllm.tokenizers.registry import tokenizer_args_from_config

cherry_pil_image = ImageAsset("cherry_blossom").pil_image
stop_pil_image = ImageAsset("stop_sign").pil_image
baby_reading_np_ndarrays = VideoAsset("baby_reading").np_ndarrays


def _build_renderer(
*, mm_cache_gb: float = 4.0, enable_prefix_caching: bool = True
) -> HfRenderer:
model_config = ModelConfig(
model="Qwen/Qwen2.5-VL-3B-Instruct",
max_model_len=128,
mm_processor_cache_gb=mm_cache_gb,
)

vllm_config = VllmConfig(
model_config=model_config,
cache_config=CacheConfig(enable_prefix_caching=enable_prefix_caching),
)

_, tokenizer_name, _, kwargs = tokenizer_args_from_config(model_config)

return HfRenderer.from_config(
vllm_config,
tokenizer_kwargs={**kwargs, "tokenizer_name": tokenizer_name},
)


def test_multi_modal_uuids_length_mismatch_raises():
renderer = _build_renderer()

mm_data = {"image": [cherry_pil_image, stop_pil_image]}

# Mismatch: 2 items but only 1 uuid provided
mm_uuids = {"image": ["hash_cherry"]}

mm_processor = renderer.get_mm_processor()
mm_items = mm_processor.info.parse_mm_data(mm_data)

with pytest.raises(ValueError, match="must have same length as"):
renderer._process_mm_uuids(mm_data, mm_items, mm_uuids, "req-1")


def test_multi_modal_uuids_missing_modality_raises():
renderer = _build_renderer()

mm_data = {
"image": [cherry_pil_image],
"video": None,
}

# Only image uuids provided; video missing should raise
mm_uuids = {"image": ["hash_cherry"]}

mm_processor = renderer.get_mm_processor()
mm_items = mm_processor.info.parse_mm_data(mm_data)

with pytest.raises(ValueError, match="is empty but .* is missing"):
renderer._process_mm_uuids(mm_data, mm_items, mm_uuids, "req-2")


@pytest.mark.parametrize(
"mm_cache_gb, enable_prefix_caching",
[
(4.0, True), # default behavior
(4.0, False), # prefix caching disabled
(0.0, True), # processor cache disabled
],
)
def test_multi_modal_uuids_accepts_none_and_passes_through(
monkeypatch, mm_cache_gb: float, enable_prefix_caching: bool
):
renderer = _build_renderer(
mm_cache_gb=mm_cache_gb,
enable_prefix_caching=enable_prefix_caching,
)

mm_data = {
"image": [cherry_pil_image, stop_pil_image],
"video": baby_reading_np_ndarrays,
}

# Use a consistent two-image scenario across all configurations
mm_uuids = {"image": [None, "hash_stop"], "video": None}

mm_processor = renderer.get_mm_processor()
mm_items = mm_processor.info.parse_mm_data(mm_data)
processed_mm_uuids = renderer._process_mm_uuids(
mm_data, mm_items, mm_uuids, "req-3"
)

assert processed_mm_uuids == mm_uuids


@pytest.mark.parametrize(
"mm_cache_gb, enable_prefix_caching",
[
(4.0, True), # default behavior
(4.0, False), # prefix caching disabled
(0.0, True), # processor cache disabled
],
)
def test_multi_modal_uuids_accepts_empty(
monkeypatch, mm_cache_gb: float, enable_prefix_caching: bool
):
renderer = _build_renderer(
mm_cache_gb=mm_cache_gb,
enable_prefix_caching=enable_prefix_caching,
)

# While None means cached multi-modal input requiring UUIDs
# an empty list means no multi-modal input
mm_data = {"image": [], "video": []} # type: ignore[var-annotated]
mm_uuids = {"image": [], "video": None} # type: ignore[var-annotated]

mm_processor = renderer.get_mm_processor()
mm_items = mm_processor.info.parse_mm_data(mm_data)
processed_mm_uuids = renderer._process_mm_uuids(
mm_data, mm_items, mm_uuids, "req-4"
)

assert processed_mm_uuids == mm_uuids


def test_multi_modal_uuids_ignored_when_caching_disabled(monkeypatch):
# When both processor cache is 0 and prefix caching disabled, the
# processor builds overrides from request id instead of using user UUIDs.
renderer = _build_renderer(mm_cache_gb=0.0, enable_prefix_caching=False)

request_id = "req-42"
mm_data = {
"image": [cherry_pil_image, stop_pil_image],
"video": baby_reading_np_ndarrays,
}
mm_uuids = {"image": ["hash_cherry", "hash_stop"], "video": ["hash_video"]}

mm_processor = renderer.get_mm_processor()
mm_items = mm_processor.info.parse_mm_data(mm_data)
processed_mm_uuids = renderer._process_mm_uuids(
mm_data, mm_items, mm_uuids, request_id
)

# Expect request-id-based overrides are passed through
assert set(mm_uuids.keys()) == {"image", "video"}
assert len(mm_uuids["image"]) == 2
assert len(mm_uuids["video"]) == 1
assert processed_mm_uuids["image"][0].startswith(
f"{request_id}-image-"
) and processed_mm_uuids["image"][0].endswith("-0")
assert processed_mm_uuids["image"][1].startswith(
f"{request_id}-image-"
) and processed_mm_uuids["image"][1].endswith("-1")
assert processed_mm_uuids["video"][0].startswith(
f"{request_id}-video-"
) and processed_mm_uuids["video"][0].endswith("-0")
2 changes: 0 additions & 2 deletions tests/samplers/test_beam_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
MODELS = ["TinyLlama/TinyLlama-1.1B-Chat-v1.0"]


@pytest.mark.skip_v1 # V1 engine does not yet support beam search
@pytest.mark.parametrize("model", MODELS)
@pytest.mark.parametrize("dtype", ["half"])
@pytest.mark.parametrize("max_tokens", MAX_TOKENS)
Expand Down Expand Up @@ -62,7 +61,6 @@ def test_beam_search_single_input(
)


@pytest.mark.skip_v1 # V1 engine does not yet support beam search
@pytest.mark.parametrize("model", MODELS)
@pytest.mark.parametrize("dtype", ["half"])
@pytest.mark.parametrize("max_tokens", MAX_TOKENS)
Expand Down
174 changes: 0 additions & 174 deletions tests/v1/engine/test_process_multi_modal_uuids.py

This file was deleted.

Loading