vllm-project · vllm-bot · Feb 17, 2026 · Feb 14, 2026 · Feb 14, 2026 · Feb 15, 2026
@@ -195,18 +195,15 @@ def test_chat_batch_failure_cleanup(llm_for_failure_test):
     valid_msg = [{"role": "user", "content": "Hello"}]
     long_text = "This is a very long text to test the error " * 50
     invalid_msg = [{"role": "user", "content": long_text}]
-    batch_1 = [
-        valid_msg,
-        valid_msg,
-        invalid_msg,
-    ]
-    batch_2 = [
-        valid_msg,
-        valid_msg,
-    ]
+
+    batch_1 = [valid_msg, valid_msg, invalid_msg]
+    batch_2 = [valid_msg, valid_msg]
     sampling_params = SamplingParams(temperature=0, max_tokens=10)
+
     with pytest.raises(ValueError, match="context length is only"):
         llm.chat(batch_1, sampling_params=sampling_params)
+    assert llm.llm_engine.get_num_unfinished_requests() == 0
+
     outputs_2 = llm.chat(batch_2, sampling_params=sampling_params)
     assert len(outputs_2) == len(batch_2)
     assert llm.llm_engine.get_num_unfinished_requests() == 0
@@ -489,8 +489,9 @@ def _assert_inputs_equal(
     if ignore_mm_keys is None:
         ignore_mm_keys = set()
 
-    a_rest = {k: v for k, v in a.items() if k != "mm_kwargs"}
-    b_rest = {k: v for k, v in b.items() if k != "mm_kwargs"}
+    ignore_prompt_keys = ("prompt", "mm_kwargs")
+    a_rest = {k: v for k, v in a.items() if k not in ignore_prompt_keys}
+    b_rest = {k: v for k, v in b.items() if k not in ignore_prompt_keys}
 
     assert a_rest == b_rest, msg
 

diff --git a/tests/renderers/test_process_multi_modal_uuids.py b/tests/renderers/test_process_multi_modal_uuids.py
@@ -0,0 +1,165 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import pytest
+
+from vllm.assets.image import ImageAsset
+from vllm.assets.video import VideoAsset
+from vllm.config import CacheConfig, ModelConfig, VllmConfig
+from vllm.renderers.hf import HfRenderer
+from vllm.tokenizers.registry import tokenizer_args_from_config
+
+cherry_pil_image = ImageAsset("cherry_blossom").pil_image
+stop_pil_image = ImageAsset("stop_sign").pil_image
+baby_reading_np_ndarrays = VideoAsset("baby_reading").np_ndarrays
+
+
+def _build_renderer(
+    *, mm_cache_gb: float = 4.0, enable_prefix_caching: bool = True
+) -> HfRenderer:
+    model_config = ModelConfig(
+        model="Qwen/Qwen2.5-VL-3B-Instruct",
+        max_model_len=128,
+        mm_processor_cache_gb=mm_cache_gb,
+    )
+
+    vllm_config = VllmConfig(
+        model_config=model_config,
+        cache_config=CacheConfig(enable_prefix_caching=enable_prefix_caching),
+    )
+
+    _, tokenizer_name, _, kwargs = tokenizer_args_from_config(model_config)
+
+    return HfRenderer.from_config(
+        vllm_config,
+        tokenizer_kwargs={**kwargs, "tokenizer_name": tokenizer_name},
+    )
+
+
+def test_multi_modal_uuids_length_mismatch_raises():
+    renderer = _build_renderer()
+
+    mm_data = {"image": [cherry_pil_image, stop_pil_image]}
+
+    # Mismatch: 2 items but only 1 uuid provided
+    mm_uuids = {"image": ["hash_cherry"]}
+
+    mm_processor = renderer.get_mm_processor()
+    mm_items = mm_processor.info.parse_mm_data(mm_data)
+
+    with pytest.raises(ValueError, match="must have same length as"):
+        renderer._process_mm_uuids(mm_data, mm_items, mm_uuids, "req-1")
+
+
+def test_multi_modal_uuids_missing_modality_raises():
+    renderer = _build_renderer()
+
+    mm_data = {
+        "image": [cherry_pil_image],
+        "video": None,
+    }
+
+    # Only image uuids provided; video missing should raise
+    mm_uuids = {"image": ["hash_cherry"]}
+
+    mm_processor = renderer.get_mm_processor()
+    mm_items = mm_processor.info.parse_mm_data(mm_data)
+
+    with pytest.raises(ValueError, match="is empty but .* is missing"):
+        renderer._process_mm_uuids(mm_data, mm_items, mm_uuids, "req-2")
+
+
+@pytest.mark.parametrize(
+    "mm_cache_gb, enable_prefix_caching",
+    [
+        (4.0, True),  # default behavior
+        (4.0, False),  # prefix caching disabled
+        (0.0, True),  # processor cache disabled
+    ],
+)
+def test_multi_modal_uuids_accepts_none_and_passes_through(
+    monkeypatch, mm_cache_gb: float, enable_prefix_caching: bool
+):
+    renderer = _build_renderer(
+        mm_cache_gb=mm_cache_gb,
+        enable_prefix_caching=enable_prefix_caching,
+    )
+
+    mm_data = {
+        "image": [cherry_pil_image, stop_pil_image],
+        "video": baby_reading_np_ndarrays,
+    }
+
+    # Use a consistent two-image scenario across all configurations
+    mm_uuids = {"image": [None, "hash_stop"], "video": None}
+
+    mm_processor = renderer.get_mm_processor()
+    mm_items = mm_processor.info.parse_mm_data(mm_data)
+    processed_mm_uuids = renderer._process_mm_uuids(
+        mm_data, mm_items, mm_uuids, "req-3"
+    )
+
+    assert processed_mm_uuids == mm_uuids
+
+
+@pytest.mark.parametrize(
+    "mm_cache_gb, enable_prefix_caching",
+    [
+        (4.0, True),  # default behavior
+        (4.0, False),  # prefix caching disabled
+        (0.0, True),  # processor cache disabled
+    ],
+)
+def test_multi_modal_uuids_accepts_empty(
+    monkeypatch, mm_cache_gb: float, enable_prefix_caching: bool
+):
+    renderer = _build_renderer(
+        mm_cache_gb=mm_cache_gb,
+        enable_prefix_caching=enable_prefix_caching,
+    )
+
+    # While None means cached multi-modal input requiring UUIDs
+    # an empty list means no multi-modal input
+    mm_data = {"image": [], "video": []}  # type: ignore[var-annotated]
+    mm_uuids = {"image": [], "video": None}  # type: ignore[var-annotated]
+
+    mm_processor = renderer.get_mm_processor()
+    mm_items = mm_processor.info.parse_mm_data(mm_data)
+    processed_mm_uuids = renderer._process_mm_uuids(
+        mm_data, mm_items, mm_uuids, "req-4"
+    )
+
+    assert processed_mm_uuids == mm_uuids
+
+
+def test_multi_modal_uuids_ignored_when_caching_disabled(monkeypatch):
+    # When both processor cache is 0 and prefix caching disabled, the
+    # processor builds overrides from request id instead of using user UUIDs.
+    renderer = _build_renderer(mm_cache_gb=0.0, enable_prefix_caching=False)
+
+    request_id = "req-42"
+    mm_data = {
+        "image": [cherry_pil_image, stop_pil_image],
+        "video": baby_reading_np_ndarrays,
+    }
+    mm_uuids = {"image": ["hash_cherry", "hash_stop"], "video": ["hash_video"]}
+
+    mm_processor = renderer.get_mm_processor()
+    mm_items = mm_processor.info.parse_mm_data(mm_data)
+    processed_mm_uuids = renderer._process_mm_uuids(
+        mm_data, mm_items, mm_uuids, request_id
+    )
+
+    # Expect request-id-based overrides are passed through
+    assert set(mm_uuids.keys()) == {"image", "video"}
+    assert len(mm_uuids["image"]) == 2
+    assert len(mm_uuids["video"]) == 1
+    assert processed_mm_uuids["image"][0].startswith(
+        f"{request_id}-image-"
+    ) and processed_mm_uuids["image"][0].endswith("-0")
+    assert processed_mm_uuids["image"][1].startswith(
+        f"{request_id}-image-"
+    ) and processed_mm_uuids["image"][1].endswith("-1")
+    assert processed_mm_uuids["video"][0].startswith(
+        f"{request_id}-video-"
+    ) and processed_mm_uuids["video"][0].endswith("-0")
diff --git a/tests/samplers/test_beam_search.py b/tests/samplers/test_beam_search.py
@@ -20,7 +20,6 @@
 MODELS = ["TinyLlama/TinyLlama-1.1B-Chat-v1.0"]
 
 
-@pytest.mark.skip_v1  # V1 engine does not yet support beam search
 @pytest.mark.parametrize("model", MODELS)
 @pytest.mark.parametrize("dtype", ["half"])
 @pytest.mark.parametrize("max_tokens", MAX_TOKENS)
@@ -62,7 +61,6 @@ def test_beam_search_single_input(
             )
 
 
-@pytest.mark.skip_v1  # V1 engine does not yet support beam search
 @pytest.mark.parametrize("model", MODELS)
 @pytest.mark.parametrize("dtype", ["half"])
 @pytest.mark.parametrize("max_tokens", MAX_TOKENS)

diff --git a/tests/v1/engine/test_process_multi_modal_uuids.py b/tests/v1/engine/test_process_multi_modal_uuids.py