vllm-project · grYe99 · Apr 21, 2026 · Apr 25, 2026 · Apr 25, 2026 · Apr 25, 2026
diff --git a/docs/design/cuda_graphs_multimodal.md b/docs/design/cuda_graphs_multimodal.md
@@ -87,6 +87,7 @@ Models opt-in to encoder CUDA Graphs by implementing the [SupportsEncoderCudaGra
 | ------------ | ------ | ------------ | ------------ |
 | `Qwen3VLForConditionalGeneration` | `Qwen3-VL` | ✅︎ | ✅︎ |
 | `Qwen2_5_VLForConditionalGeneration` | `Qwen2.5-VL` | ✅︎ | ✅︎ |
+| `Glm4vForConditionalGeneration` | `GLM-4.1V, GLM-4.6V-Flash` | ✅︎ | ✅︎ |
 
 !!! note
     Encoder CUDA Graphs have currently been tested with `--mm-encoder-attn-backend=FLASH_ATTN` and `--mm-encoder-attn-backend=FLASHINFER` on Blackwell GPUs.

diff --git a/examples/generate/multimodal/vision_language_offline.py b/examples/generate/multimodal/vision_language_offline.py
@@ -588,7 +588,6 @@ def run_glm4_1v(questions: list[str], modality: str) -> ModelRequestData:
             "fps": 1,
         },
         limit_mm_per_prompt=mm_limit,
-        enforce_eager=True,
     )
 
     image_placeholder = "<|begin_of_image|><|image|><|end_of_image|>"
@@ -2467,6 +2466,7 @@ def run_tarsier2(questions: list[str], modality: str) -> ModelRequestData:
     "qwen3_vl",
     "qwen3_vl_moe",
     "qwen2_5_vl",
+    "glm4_1v",
 ]
 
 

@@ -66,6 +66,21 @@ def qwen_vl_chat_template(content: str) -> str:
         needs_video_metadata=False,
         marks=[pytest.mark.core_model],
     ),
+    "glm4_1v": VitCudagraphTestConfig(
+        model="zai-org/GLM-4.1V-9B-Thinking",
+        image_prompt=(
+            "[gMASK]<sop><|system|>\nYou are a helpful assistant.<|user|>\n"
+            "<|begin_of_image|><|image|><|end_of_image|>"
+            "What is in this image?<|assistant|>assistant\n"
+        ),
+        video_prompt=(
+            "[gMASK]<sop><|system|>\nYou are a helpful assistant.<|user|>\n"
+            "<|begin_of_video|><|video|><|end_of_video|>"
+            "Describe this video in one sentence<|assistant|>assistant\n"
+        ),
+        needs_video_metadata=True,
+        marks=[pytest.mark.core_model],
+    ),
 }