vllm-project · YunzhuLu · May 7, 2026 · May 7, 2026
diff --git a/docs/design/cuda_graphs_multimodal.md b/docs/design/cuda_graphs_multimodal.md
@@ -87,6 +87,7 @@ Models opt-in to encoder CUDA Graphs by implementing the [SupportsEncoderCudaGra
 | ------------ | ------ | ------------ | ------------ |
 | `Qwen3VLForConditionalGeneration` | `Qwen3-VL` | ✅︎ | ✅︎ |
 | `Qwen2_5_VLForConditionalGeneration` | `Qwen2.5-VL` | ✅︎ | ✅︎ |
+| `MiniCPMV` | `MiniCPMV2.5`,`MiniCPMV2.6`,`MiniCPMV4.0`,`MiniCPMV4.5` | ✅︎ | ✅︎ |
 
 !!! note
     Encoder CUDA Graphs have currently been tested with `--mm-encoder-attn-backend=FLASH_ATTN` and `--mm-encoder-attn-backend=FLASHINFER` on Blackwell GPUs.

diff --git a/examples/generate/multimodal/vision_language_offline.py b/examples/generate/multimodal/vision_language_offline.py
@@ -2467,6 +2467,7 @@ def run_tarsier2(questions: list[str], modality: str) -> ModelRequestData:
     "qwen3_vl",
     "qwen3_vl_moe",
     "qwen2_5_vl",
+    "minicpmv",
 ]
 
 

@@ -41,6 +41,20 @@ def qwen_vl_chat_template(content: str) -> str:
     return f"<|im_start|>user\n{content}<|im_end|>\n<|im_start|>assistant\n"
 
 
+def minicpmv_25_chat_template(content: str) -> str:
+    """Llama3-style chat template used by MiniCPM-V 2.5."""
+    return (
+        f"<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n\n"
+        f"{content}"
+        f"<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
+    )
+
+
+def minicpmv_chat_template(content: str) -> str:
+    """ChatML template used by MiniCPM-V 2.6 / 4.0 / 4.5."""
+    return f"<|im_start|>user\n{content}<|im_end|>\n<|im_start|>assistant\n"
+
+
 MODEL_CONFIGS: dict[str, VitCudagraphTestConfig] = {
     "qwen3_vl": VitCudagraphTestConfig(
         model="Qwen/Qwen3-VL-2B-Instruct",
@@ -66,6 +80,48 @@ def qwen_vl_chat_template(content: str) -> str:
         needs_video_metadata=False,
         marks=[pytest.mark.core_model],
     ),
+    "minicpmv_25": VitCudagraphTestConfig(
+        model="openbmb/MiniCPM-Llama3-V-2_5",
+        modalities=["image"],
+        image_prompt=minicpmv_25_chat_template(
+            "(<image>./</image>)\nWhat is in this image?"
+        ),
+        vllm_runner_kwargs={"trust_remote_code": True},
+        marks=[pytest.mark.core_model],
+    ),
+    "minicpmv_26": VitCudagraphTestConfig(
+        model="openbmb/MiniCPM-V-2_6",
+        image_prompt=minicpmv_chat_template(
+            "(<image>./</image>)\nWhat is in this image?"
+        ),
+        video_prompt=minicpmv_chat_template(
+            "(<video>./</video>)\nDescribe this video in one sentence."
+        ),
+        vllm_runner_kwargs={"trust_remote_code": True},
+        marks=[pytest.mark.core_model],
+    ),
+    "minicpmv_40": VitCudagraphTestConfig(
+        model="openbmb/MiniCPM-V-4",
+        image_prompt=minicpmv_chat_template(
+            "(<image>./</image>)\nWhat is in this image?"
+        ),
+        video_prompt=minicpmv_chat_template(
+            "(<video>./</video>)\nDescribe this video in one sentence."
+        ),
+        vllm_runner_kwargs={"trust_remote_code": True},
+        marks=[pytest.mark.core_model],
+    ),
+    "minicpmv_45": VitCudagraphTestConfig(
+        model="openbmb/MiniCPM-V-4_5",
+        image_prompt=minicpmv_chat_template(
+            "(<image>./</image>)\nWhat is in this image?"
+        ),
+        video_prompt=minicpmv_chat_template(
+            "(<video>./</video>)\nDescribe this video in one sentence."
+        ),
+        vllm_runner_kwargs={"trust_remote_code": True},
+        marks=[pytest.mark.core_model],
+    ),
 }