chore: Define new get_max_placeholder_tokens

milesial · milesial · commit 8da66450421d · 2025-07-29T09:59:18.000-07:00
Signed-off-by: Alexandre Milesi &lt;30204471+milesial@users.noreply.github.com&gt;
diff --git a/vllm/multimodal/profiling.py b/vllm/multimodal/profiling.py
@@ -180,11 +180,14 @@ def _get_dummy_mm_inputs(
     def _get_mm_num_tokens(
         self,
         mm_inputs: MultiModalInputs,
+        mm_embeddings_only: bool = True,
     ) -> Mapping[str, int]:
         placeholders_by_modality = mm_inputs["mm_placeholders"]
 
         return {
-            modality: sum(item.length for item in placeholders)
+            modality:
+            sum(item.get_num_embeds() if mm_embeddings_only else item.length
+                for item in placeholders)
             for modality, placeholders in placeholders_by_modality.items()
         }
 
@@ -257,6 +260,7 @@ def get_mm_max_tokens(
         self,
         seq_len: int,
         mm_counts: Optional[Mapping[str, int]] = None,
+        mm_embeddings_only: bool = True,
     ) -> Mapping[str, int]:
         if mm_counts is None:
             mm_counts = self.get_mm_limits()
@@ -285,4 +289,14 @@ def get_mm_max_tokens(
             return max_tokens_per_item
 
         mm_inputs = self._get_dummy_mm_inputs(seq_len, mm_counts)
-        return self._get_mm_num_tokens(mm_inputs)
+        return self._get_mm_num_tokens(mm_inputs,
+                                       mm_embeddings_only=mm_embeddings_only)
+
+    def get_max_placeholder_tokens(
+        self,
+        seq_len: int,
+        mm_counts: Optional[Mapping[str, int]] = None,
+    ):
+        return self.get_mm_max_tokens(seq_len,
+                                      mm_counts,
+                                      mm_embeddings_only=False)
diff --git a/vllm/multimodal/registry.py b/vllm/multimodal/registry.py
@@ -129,7 +129,7 @@ def get_max_tokens_per_item_by_modality(
         seq_len = model_config.max_model_len
         mm_limits = self.get_mm_limits_per_prompt(model_config)
 
-        return profiler.get_mm_max_tokens(
+        return profiler.get_max_placeholder_tokens(
             seq_len,
             {
                 modality: 1

Original file line number	Diff line number	Diff line change
`@@ -129,7 +129,7 @@ def get_max_tokens_per_item_by_modality(`
`129`	`129`	`seq_len = model_config.max_model_len`
`130`	`130`	`mm_limits = self.get_mm_limits_per_prompt(model_config)`
`131`	`131`
`132`		`- return profiler.get_mm_max_tokens(`
	`132`	`+ return profiler.get_max_placeholder_tokens(`
`133`	`133`	`seq_len,`
`134`	`134`	`{`
`135`	`135`	`modality: 1`