huggingface · qgallouedec · Apr 17, 2026 · Apr 15, 2026 · Apr 15, 2026 · Apr 16, 2026
diff --git a/tests/test_chat_template_utils.py b/tests/test_chat_template_utils.py
@@ -18,12 +18,7 @@
 import pytest
 import transformers
 from packaging.version import Version
-from transformers import (
-    AutoModelForCausalLM,
-    AutoModelForSequenceClassification,
-    AutoProcessor,
-    AutoTokenizer,
-)
+from transformers import AutoModelForCausalLM, AutoModelForSequenceClassification, AutoProcessor, AutoTokenizer
 
 from trl import clone_chat_template
 from trl.chat_template_utils import (
@@ -35,7 +30,7 @@
 )
 from trl.data_utils import prepare_multimodal_messages
 
-from .testing_utils import TrlTestCase, require_jmespath
+from .testing_utils import TrlTestCase, require_jmespath, require_vision
 
 
 class TestCloneChatTemplate(TrlTestCase):
@@ -346,6 +341,70 @@ def test_non_prefix_preserving_template(self):
         {%- endif %}""")
         assert is_chat_template_prefix_preserving(tokenizer) is False
 
+    @require_vision
+    def test_prefix_preserving_template_processor(self):
+        processor = AutoProcessor.from_pretrained("trl-internal-testing/tiny-Qwen3VLForConditionalGeneration")
+        # Simple prefix-preserving template that mirrors how Qwen-VL templates emit image tokens: a list-of-blocks
+        # content is iterated, and `{"type": "image"}` blocks are rendered as `<|vision_start|><|image_pad|><|vision_end|>`.
+        # docstyle-ignore
+        processor.chat_template = textwrap.dedent(r"""
+        {%- for message in messages %}
+
+        {%- if message.role == 'user' %}
+            {{- '<|im_start|>user\n' }}
+            {%- if message.content is string %}
+                {{- message.content }}
+            {%- else %}
+                {%- for content in message.content %}
+                    {%- if content.type == 'image' or 'image' in content %}
+                        {{- '<|vision_start|><|image_pad|><|vision_end|>' }}
+                    {%- elif 'text' in content %}
+                        {{- content.text }}
+                    {%- endif %}
+                {%- endfor %}
+            {%- endif %}
+            {{- '<|im_end|>\n' }}
+        {%- elif message.role == 'assistant' %}
+            {{- '<|im_start|>assistant\n' }}
+            {%- if message.content is string %}
+                {{- message.content }}
+            {%- else %}
+                {%- for content in message.content %}
+                    {%- if 'text' in content %}
+                        {{- content.text }}
+                    {%- endif %}
+                {%- endfor %}
+            {%- endif %}
+            {%- if message.tool_calls %}
+                {%- for tool_call in message.tool_calls %}
+                    {%- if tool_call.function %}
+                        {%- set tool_call = tool_call.function %}
+                    {%- endif %}
+                    {{- '<tool_call>' + tool_call.name + '</tool_call>' }}
+                {%- endfor %}
+            {%- endif %}
+            {{- '<|im_end|>\n' }}
+        {%- elif message.role == 'tool' %}
+            {{- '<|im_start|>tool\n' }}
+            {%- if message.content is string %}
+                {{- message.content }}
+            {%- else %}
+                {%- for content in message.content %}
+                    {%- if 'text' in content %}
+                        {{- content.text }}
+                    {%- endif %}
+                {%- endfor %}
+            {%- endif %}
+            {{- '<|im_end|>\n' }}
+        {%- endif %}
+
+        {%- endfor %}
+
+        {%- if add_generation_prompt %}
+            {{- '<|im_start|>assistant\n' }}
+        {%- endif %}""")
+        assert is_chat_template_prefix_preserving(processor) is True
+
 
 @pytest.mark.parametrize(
     "tokenizer_name",

diff --git a/trl/chat_template_utils.py b/trl/chat_template_utils.py
@@ -450,7 +450,7 @@ def supports_tool_calling(processing_class) -> bool:
     return all(s in rendered for s in (_name_sentinel, _arg_key_sentinel, _arg_val_sentinel, _content_sentinel))
 
 
-def is_chat_template_prefix_preserving(tokenizer: PreTrainedTokenizer) -> bool:
+def is_chat_template_prefix_preserving(processing_class: PreTrainedTokenizer | ProcessorMixin) -> bool:
     """
     Check whether the chat template preserves prefixes when applied.
 
@@ -459,8 +459,8 @@ def is_chat_template_prefix_preserving(tokenizer: PreTrainedTokenizer) -> bool:
     tokenizations with and without tool messages appended.
 
     Args:
-        tokenizer (`PreTrainedTokenizer`):
-            Tokenizer instance to check.
+        processing_class (`PreTrainedTokenizer` or `ProcessorMixin`):
+            Tokenizer or processor instance to check.
 
     Returns:
         `bool`:
@@ -477,18 +477,26 @@ def is_chat_template_prefix_preserving(tokenizer: PreTrainedTokenizer) -> bool:
         {"role": "assistant", "content": "", "tool_calls": dummy_tool_calls},
         {"role": "tool", "name": "dummy", "content": "dummy"},
     ]
+    # VLM processors expect structured list-of-blocks content, and image-token expansion only kicks in when an image
+    # is actually present, so include a dummy image to exercise the real code path.
+    if isinstance(processing_class, ProcessorMixin):
+        from PIL import Image
+
+        dummy_image = Image.new("RGB", (8, 8))
+        messages1 = prepare_multimodal_messages(messages1, images=[dummy_image])
+        messages2 = prepare_multimodal_messages(messages2, images=[dummy_image])
 
     try:
-        text1 = tokenizer.apply_chat_template(messages1, tokenize=False)
-        text2 = tokenizer.apply_chat_template(messages2, tokenize=False, add_generation_prompt=True)
+        text1 = processing_class.apply_chat_template(messages1, tokenize=False)
+        text2 = processing_class.apply_chat_template(messages2, tokenize=False, add_generation_prompt=True)
     except TypeError:
         # Best-effort fallback for templates that reject dict args (e.g. DeepSeek-V3). This is a chat template
         # bug (see transformers#45419), and the training chat template fixes it to avoid blocking users.
         dummy_tool_calls = [{"type": "function", "function": {"name": "dummy", "arguments": "{}"}}]
         messages1[1]["tool_calls"] = dummy_tool_calls
         messages2[1]["tool_calls"] = dummy_tool_calls
-        text1 = tokenizer.apply_chat_template(messages1, tokenize=False)
-        text2 = tokenizer.apply_chat_template(messages2, tokenize=False, add_generation_prompt=True)
+        text1 = processing_class.apply_chat_template(messages1, tokenize=False)
+        text2 = processing_class.apply_chat_template(messages2, tokenize=False, add_generation_prompt=True)
 
     return text2.startswith(text1)