Address comments and fix test

chang-l · chang-l · commit eb90a71b03cb · 2025-09-19T12:41:38.000-07:00
Signed-off-by: Chang Liu (Enterprise Products) &lt;9713593+chang-l@users.noreply.github.com&gt;
diff --git a/tensorrt_llm/disaggregated_params.py b/tensorrt_llm/disaggregated_params.py
@@ -1,6 +1,8 @@
 from dataclasses import dataclass
 from typing import Any, Dict, List, Optional
 
+import numpy as np
+
 # isort: off
 # needed before trying to import bindings to load tensorrt_libs
 import tensorrt as trt  # noqa
@@ -58,6 +60,17 @@ def get_request_type(self) -> tllme.RequestType:
             )
 
     def __post_init__(self):
+        if self.request_type is not None:
+            self.request_type = self.request_type.lower()
+            if self.request_type not in [
+                "context_only",
+                "generation_only",
+                "context_and_generation",
+            ]:
+                raise ValueError(
+                    f"Unknown request type: {self.request_type}. Must be context_only, generation_only or "
+                    "context_and_generation"
+                )
         if self.multimodal_embedding_handles is not None:
             if self.multimodal_hashes is not None:
                 # if mm hashes are provided, kvcache reuse can be enabled
@@ -69,8 +82,6 @@ def __post_init__(self):
                     assert len(mm_hash) == 8, "mm_hash must be a list of 8 integers"
                     assert all(isinstance(x, int) for x in mm_hash), "mm_hash must contain integers"
             else:
-                import numpy as np
-
                 # if user did not provide mm embedding handles, kvcache reuse will be disabled
                 assert len(self.multimodal_embedding_handles) > 0, (
                     "multimodal_embedding_handles must be provided"
diff --git a/tests/unittest/_torch/multimodal/test_mm_encoder_standalone.py b/tests/unittest/_torch/multimodal/test_mm_encoder_standalone.py
@@ -1,17 +1,21 @@
 import json
 import os
+from pathlib import Path
 
 import pytest
+from utils.llm_data import llm_models_root
 
 from tensorrt_llm import MultimodalEncoder
 from tensorrt_llm.inputs import default_multimodal_input_loader
 from tensorrt_llm.llmapi import KvCacheConfig
 from tensorrt_llm.llmapi.llm import LLM, SamplingParams
 
+test_data_root = Path(
+    os.path.join(llm_models_root(), "multimodals", "test_data"))
 example_images = [
-    "https://huggingface.co/datasets/YiYiXu/testing-images/resolve/main/seashore.png",
-    "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/inpaint.png",
-    "https://huggingface.co/datasets/Sayali9141/traffic_signal_images/resolve/main/61.jpg",
+    str(test_data_root / "seashore.png"),
+    str(test_data_root / "inpaint.png"),
+    str(test_data_root / "61.jpg"),
 ]
 
 
@@ -184,7 +188,8 @@ def test_multi_request_batch_chat(model_key, multimodal_model_config):
 
     sampling_params = SamplingParams(max_tokens=max_tokens)
     kv_cache_config = KvCacheConfig(
-        enable_block_reuse=True,
+        enable_block_reuse=
+        False,  # Disable block reuse for output 1-1 matching check
         free_gpu_memory_fraction=free_gpu_memory_fraction,
     )