From d10bc1c6a052f8263dbf059874348d972b290f71 Mon Sep 17 00:00:00 2001
From: Matt <rocketknight1@gmail.com>
Date: Fri, 10 May 2024 15:09:40 +0100
Subject: [PATCH 01/12] Let's try moving chat templates out of IDEFICS and into
 the generic ProcessorMixin

---
 .../models/idefics2/processing_idefics2.py    | 48 +----------------
 src/transformers/processing_utils.py          | 51 ++++++++++++++++++-
 2 files changed, 50 insertions(+), 49 deletions(-)
diff --git a/src/transformers/models/idefics2/processing_idefics2.py b/src/transformers/models/idefics2/processing_idefics2.py
index e9f9f9233736..447068607ec3 100644
--- a/src/transformers/models/idefics2/processing_idefics2.py
+++ b/src/transformers/models/idefics2/processing_idefics2.py
@@ -16,7 +16,7 @@
 Processor class for IDEFICS2.
 """
 
-from typing import TYPE_CHECKING, Dict, List, Optional, Union
+from typing import TYPE_CHECKING, List, Optional, Union
 
 from ...feature_extraction_utils import BatchFeature
 from ...image_utils import ImageInput, is_valid_image, load_image
@@ -78,9 +78,6 @@ def __init__(self, image_processor, tokenizer=None, image_seq_len: int = 64, **k
         }
         tokenizer.add_special_tokens(tokens_to_add)
 
-        # Stores a Jinja template that formats chat histories into tokenizable strings
-        self.chat_template = kwargs.pop("chat_template", None)
-
         super().__init__(image_processor, tokenizer)
 
     def _extract_images_from_prompts(self, prompts):
@@ -252,49 +249,6 @@ def model_input_names(self):
         image_processor_input_names = self.image_processor.model_input_names
         return list(dict.fromkeys(tokenizer_input_names + image_processor_input_names))
 
-    def apply_chat_template(
-        self,
-        conversation: Union[List[Dict[str, str]]],
-        chat_template: Optional[str] = None,
-        tokenize: bool = False,
-        **kwargs,
-    ) -> str:
-        """
-        Overrides the tokenizer's `apply_chat_template` method to apply the IDEFICS2 chat template by default
-        if no chat template is provided.
-
-        By default, the output isn't tokenized. This is because the IDEFICS2 chat template is designed to insert
-        the image token <image> into the sequence according to the message, but does not handle expanding the image
-        tokens to the sequence length or adding the surrounding tokens e.g. <fake_image_token>.
-
-        Args:
-            conversation (`Union[List[Dict, str, str]]`):
-                The conversation to format.
-            chat_template (`Optional[str]`, *optional*):
-                The Jinja template to use for formatting the conversation. If not provided, the default chat template
-                is used.
-            tokenize (`bool`, *optional*, defaults to `False`):
-                Whether to tokenize the output or not.
-            **kwargs:
-                Additional keyword arguments for the tokenizer's `apply_chat_template` method.
-        """
-
-        if chat_template is None:
-            if self.chat_template is not None:
-                chat_template = self.chat_template
-            else:
-                logger.warning_once(
-                    "No chat template is set for this processor, falling back to a default class-level template. This is "
-                    "very error-prone, because models are often trained with templates different from the class default! "
-                    "Default chat templates are a legacy feature and will be removed in Transformers v4.43, at which "
-                    "point any code depending on them will stop working. We recommend setting a valid chat template before "
-                    "then to ensure that this model continues working without issues."
-                )
-                chat_template = self.default_chat_template
-        return self.tokenizer.apply_chat_template(
-            conversation, chat_template=chat_template, tokenize=tokenize, **kwargs
-        )
-
     @property
     def default_chat_template(self):
         """
diff --git a/src/transformers/processing_utils.py b/src/transformers/processing_utils.py
index d76fa4dccccf..3f707f4d75e7 100644
--- a/src/transformers/processing_utils.py
+++ b/src/transformers/processing_utils.py
@@ -22,7 +22,7 @@
 import os
 import warnings
 from pathlib import Path
-from typing import Any, Dict, Optional, Tuple, Union
+from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union
 
 from .dynamic_module_utils import custom_object_save
 from .tokenization_utils_base import PreTrainedTokenizerBase
@@ -41,6 +41,10 @@
 )
 
 
+if TYPE_CHECKING:
+    from .pipelines.conversational import Conversation
+
+
 logger = logging.get_logger(__name__)
 
 # Dynamically import the Transformers module to grab the attribute classes of the processor form their names.
@@ -59,7 +63,7 @@ class ProcessorMixin(PushToHubMixin):
     This is a mixin used to provide saving/loading functionality for all processor classes.
     """
 
-    attributes = ["feature_extractor", "tokenizer"]
+    attributes = ["feature_extractor", "tokenizer", "chat_template"]
     # Names need to be attr_class for attr in attributes
     feature_extractor_class = None
     tokenizer_class = None
@@ -522,6 +526,49 @@ def model_input_names(self):
         first_attribute = getattr(self, self.attributes[0])
         return getattr(first_attribute, "model_input_names", None)
 
+    def apply_chat_template(
+        self,
+        conversation: Union[List[Dict[str, str]], "Conversation"],
+        chat_template: Optional[str] = None,
+        tokenize: bool = False,
+        **kwargs,
+    ) -> str:
+        """
+        Overrides the tokenizer's `apply_chat_template` method to apply the IDEFICS2 chat template by default
+        if no chat template is provided.
+
+        By default, the output isn't tokenized. This is because the IDEFICS2 chat template is designed to insert
+        the image token <image> into the sequence according to the message, but does not handle expanding the image
+        tokens to the sequence length or adding the surrounding tokens e.g. <fake_image_token>.
+
+        Args:
+            conversation (`Union[List[Dict, str, str], "Conversation"]`):
+                The conversation to format.
+            chat_template (`Optional[str]`, *optional*):
+                The Jinja template to use for formatting the conversation. If not provided, the default chat template
+                is used.
+            tokenize (`bool`, *optional*, defaults to `False`):
+                Whether to tokenize the output or not.
+            **kwargs:
+                Additional keyword arguments for the tokenizer's `apply_chat_template` method.
+        """
+
+        if chat_template is None:
+            if self.chat_template is not None:
+                chat_template = self.chat_template
+            else:
+                logger.warning_once(
+                    "No chat template is set for this processor, falling back to a default class-level template. This is "
+                    "very error-prone, because models are often trained with templates different from the class default! "
+                    "Default chat templates are a legacy feature and will be removed in Transformers v4.43, at which "
+                    "point any code depending on them will stop working. We recommend setting a valid chat template before "
+                    "then to ensure that this model continues working without issues."
+                )
+                chat_template = self.default_chat_template
+        return self.tokenizer.apply_chat_template(
+            conversation, chat_template=chat_template, tokenize=tokenize, **kwargs
+        )
+
 
 ProcessorMixin.push_to_hub = copy_func(ProcessorMixin.push_to_hub)
 if ProcessorMixin.push_to_hub.__doc__ is not None:

From 8fa0e8cb1632a1b3ecebfefd53b24bd554df0d6b Mon Sep 17 00:00:00 2001
From: Matt <rocketknight1@gmail.com>
Date: Fri, 10 May 2024 15:15:20 +0100
Subject: [PATCH 02/12] Chat templates should not be mandatory

---
 src/transformers/processing_utils.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/transformers/processing_utils.py b/src/transformers/processing_utils.py
index 3f707f4d75e7..3494908210c3 100644
--- a/src/transformers/processing_utils.py
+++ b/src/transformers/processing_utils.py
@@ -63,7 +63,8 @@ class ProcessorMixin(PushToHubMixin):
     This is a mixin used to provide saving/loading functionality for all processor classes.
     """
 
-    attributes = ["feature_extractor", "tokenizer", "chat_template"]
+    attributes = ["feature_extractor", "tokenizer"]
+    optional_attributes = ["chat_template"]
     # Names need to be attr_class for attr in attributes
     feature_extractor_class = None
     tokenizer_class = None
@@ -71,6 +72,9 @@ class ProcessorMixin(PushToHubMixin):
 
     # args have to match the attributes class attribute
     def __init__(self, *args, **kwargs):
+        # First, extract optional attributes from kwargs if present
+        for optional_attribute in self.optional_attributes:
+            setattr(self, optional_attribute, kwargs.pop(optional_attribute, None))
         # Sanitize args and kwargs
         for key in kwargs:
             if key not in self.attributes:

From 445c77ff7cd3406456b66c56bf8845b53e07bf1b Mon Sep 17 00:00:00 2001
From: Matt <rocketknight1@gmail.com>
Date: Fri, 10 May 2024 15:16:06 +0100
Subject: [PATCH 03/12] Chat templates should not be mandatory

---
 src/transformers/processing_utils.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/transformers/processing_utils.py b/src/transformers/processing_utils.py
index 3494908210c3..9b30d08be075 100644
--- a/src/transformers/processing_utils.py
+++ b/src/transformers/processing_utils.py
@@ -73,6 +73,7 @@ class ProcessorMixin(PushToHubMixin):
     # args have to match the attributes class attribute
     def __init__(self, *args, **kwargs):
         # First, extract optional attributes from kwargs if present
+        # Optional attributes can never be positional arguments
         for optional_attribute in self.optional_attributes:
             setattr(self, optional_attribute, kwargs.pop(optional_attribute, None))
         # Sanitize args and kwargs

From a16b53a2d18ed22b04acaf3a07dd58cb366555b1 Mon Sep 17 00:00:00 2001
From: Matt <rocketknight1@gmail.com>
Date: Fri, 10 May 2024 15:33:01 +0100
Subject: [PATCH 04/12] Not all classes will have default chat templates

---
 src/transformers/processing_utils.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/transformers/processing_utils.py b/src/transformers/processing_utils.py
index 9b30d08be075..e7d8ee5ee465 100644
--- a/src/transformers/processing_utils.py
+++ b/src/transformers/processing_utils.py
@@ -561,7 +561,7 @@ def apply_chat_template(
         if chat_template is None:
             if self.chat_template is not None:
                 chat_template = self.chat_template
-            else:
+            elif getattr(self, "default_chat_template", None) is not None:
                 logger.warning_once(
                     "No chat template is set for this processor, falling back to a default class-level template. This is "
                     "very error-prone, because models are often trained with templates different from the class default! "
@@ -570,6 +570,12 @@ def apply_chat_template(
                     "then to ensure that this model continues working without issues."
                 )
                 chat_template = self.default_chat_template
+            else:
+                raise ValueError(
+                    "No chat template is set for this processor. Please either set the `chat_template` attribute, "
+                    "or provide a chat template as an argument. See "
+                    "https://huggingface.co/docs/transformers/main/en/chat_templating for more information."
+                )
         return self.tokenizer.apply_chat_template(
             conversation, chat_template=chat_template, tokenize=tokenize, **kwargs
         )

From ce831e6ceb7594182037cf52150456ad9fd9c451 Mon Sep 17 00:00:00 2001
From: Matt <rocketknight1@gmail.com>
Date: Mon, 13 May 2024 16:53:01 +0100
Subject: [PATCH 05/12] stash commit

---
 src/transformers/models/idefics2/processing_idefics2.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/transformers/models/idefics2/processing_idefics2.py b/src/transformers/models/idefics2/processing_idefics2.py
index 447068607ec3..c87e3f119436 100644
--- a/src/transformers/models/idefics2/processing_idefics2.py
+++ b/src/transformers/models/idefics2/processing_idefics2.py
@@ -62,7 +62,7 @@ class Idefics2Processor(ProcessorMixin):
     image_processor_class = "Idefics2ImageProcessor"
     tokenizer_class = "AutoTokenizer"
 
-    def __init__(self, image_processor, tokenizer=None, image_seq_len: int = 64, **kwargs):
+    def __init__(self, image_processor, tokenizer=None, image_seq_len: int = 64, chat_template: str = None, **kwargs):
         if image_processor is None:
             raise ValueError("You need to specify an `image_processor`.")
         if tokenizer is None:
@@ -72,6 +72,7 @@ def __init__(self, image_processor, tokenizer=None, image_seq_len: int = 64, **k
         self.image_token = AddedToken("<image>", normalized=False, special=True)
         self.end_of_utterance_token = AddedToken("<end_of_utterance>", normalized=False, special=True)
         self.image_seq_len = image_seq_len
+        self.chat_template = chat_template
 
         tokens_to_add = {
             "additional_special_tokens": [self.fake_image_token, self.image_token, self.end_of_utterance_token]

From ca685a27e8b7b5207e7bfec1194b92acf1cf78f8 Mon Sep 17 00:00:00 2001
From: Matt <rocketknight1@gmail.com>
Date: Fri, 7 Jun 2024 17:16:54 +0100
Subject: [PATCH 06/12] Add chat template docstring

---
 src/transformers/models/idefics2/processing_idefics2.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/transformers/models/idefics2/processing_idefics2.py b/src/transformers/models/idefics2/processing_idefics2.py
index c87e3f119436..ef48ff050e1e 100644
--- a/src/transformers/models/idefics2/processing_idefics2.py
+++ b/src/transformers/models/idefics2/processing_idefics2.py
@@ -56,6 +56,9 @@ class Idefics2Processor(ProcessorMixin):
             The length of the image sequence i.e. the number of <image> tokens per image in the input.
             This parameter is used to build the string from the input prompt and image tokens and should match the
             config.perceiver_config.resampler_n_latents value for the model used.
+        chat_template (`str`, *optional*): A Jinja template which will be used to convert lists of messages
+            in a chat into a tokenizable string. This argument is optional, and only relevant to processors that support
+            chat inputs.
     """
 
     attributes = ["image_processor", "tokenizer"]

From 804636e721b0bfb16a2ffa46b1ada04a273b6626 Mon Sep 17 00:00:00 2001
From: Matt <rocketknight1@gmail.com>
Date: Fri, 7 Jun 2024 17:35:34 +0100
Subject: [PATCH 07/12] Clean up docstring

---
 src/transformers/processing_utils.py | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/src/transformers/processing_utils.py b/src/transformers/processing_utils.py
index e7d8ee5ee465..ce18b7de1f48 100644
--- a/src/transformers/processing_utils.py
+++ b/src/transformers/processing_utils.py
@@ -539,15 +539,11 @@ def apply_chat_template(
         **kwargs,
     ) -> str:
         """
-        Overrides the tokenizer's `apply_chat_template` method to apply the IDEFICS2 chat template by default
-        if no chat template is provided.
-
-        By default, the output isn't tokenized. This is because the IDEFICS2 chat template is designed to insert
-        the image token <image> into the sequence according to the message, but does not handle expanding the image
-        tokens to the sequence length or adding the surrounding tokens e.g. <fake_image_token>.
+        Similar to the `apply_chat_template` method on tokenizers, this method applies a Jinja template to input
+        conversations to turn them into a single tokenizable string.
 
         Args:
-            conversation (`Union[List[Dict, str, str], "Conversation"]`):
+            conversation (`List[Dict, str, str]`):
                 The conversation to format.
             chat_template (`Optional[str]`, *optional*):
                 The Jinja template to use for formatting the conversation. If not provided, the default chat template
@@ -555,7 +551,7 @@ def apply_chat_template(
             tokenize (`bool`, *optional*, defaults to `False`):
                 Whether to tokenize the output or not.
             **kwargs:
-                Additional keyword arguments for the tokenizer's `apply_chat_template` method.
+                Additional keyword arguments
         """
 
         if chat_template is None:

From b1ecb6448478ed42fd1fcfd4f8416acbec171c07 Mon Sep 17 00:00:00 2001
From: Matt <rocketknight1@gmail.com>
Date: Fri, 7 Jun 2024 18:16:44 +0100
Subject: [PATCH 08/12] Add chat templates to LLaVA/LLaVA-next

---
 src/transformers/models/llava/processing_llava.py           | 4 ++--
 src/transformers/models/llava_next/processing_llava_next.py | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/transformers/models/llava/processing_llava.py b/src/transformers/models/llava/processing_llava.py
index 7016cd500969..cd0459128f6f 100644
--- a/src/transformers/models/llava/processing_llava.py
+++ b/src/transformers/models/llava/processing_llava.py
@@ -43,8 +43,8 @@ class LlavaProcessor(ProcessorMixin):
     image_processor_class = "AutoImageProcessor"
     tokenizer_class = "AutoTokenizer"
 
-    def __init__(self, image_processor=None, tokenizer=None):
-        super().__init__(image_processor, tokenizer)
+    def __init__(self, image_processor=None, tokenizer=None, chat_template=None):
+        super().__init__(image_processor, tokenizer, chat_template=chat_template)
 
     def __call__(
         self,
diff --git a/src/transformers/models/llava_next/processing_llava_next.py b/src/transformers/models/llava_next/processing_llava_next.py
index 91cd544ab648..890326c55ee2 100644
--- a/src/transformers/models/llava_next/processing_llava_next.py
+++ b/src/transformers/models/llava_next/processing_llava_next.py
@@ -43,8 +43,8 @@ class LlavaNextProcessor(ProcessorMixin):
     image_processor_class = "AutoImageProcessor"
     tokenizer_class = "AutoTokenizer"
 
-    def __init__(self, image_processor=None, tokenizer=None):
-        super().__init__(image_processor, tokenizer)
+    def __init__(self, image_processor=None, tokenizer=None, chat_template=None):
+        super().__init__(image_processor, tokenizer, chat_template=chat_template)
 
     def __call__(
         self,

From c440ca5fd7b18ac284a1bd8292651147f91effe0 Mon Sep 17 00:00:00 2001
From: Matt <rocketknight1@gmail.com>
Date: Fri, 7 Jun 2024 22:58:30 +0100
Subject: [PATCH 09/12] Docstring fixup

---
 src/transformers/models/idefics2/processing_idefics2.py     | 3 +--
 src/transformers/models/llava/processing_llava.py           | 2 ++
 src/transformers/models/llava_next/processing_llava_next.py | 2 ++
 3 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/transformers/models/idefics2/processing_idefics2.py b/src/transformers/models/idefics2/processing_idefics2.py
index ef48ff050e1e..ce74f55647f0 100644
--- a/src/transformers/models/idefics2/processing_idefics2.py
+++ b/src/transformers/models/idefics2/processing_idefics2.py
@@ -57,8 +57,7 @@ class Idefics2Processor(ProcessorMixin):
             This parameter is used to build the string from the input prompt and image tokens and should match the
             config.perceiver_config.resampler_n_latents value for the model used.
         chat_template (`str`, *optional*): A Jinja template which will be used to convert lists of messages
-            in a chat into a tokenizable string. This argument is optional, and only relevant to processors that support
-            chat inputs.
+            in a chat into a tokenizable string.
     """
 
     attributes = ["image_processor", "tokenizer"]
diff --git a/src/transformers/models/llava/processing_llava.py b/src/transformers/models/llava/processing_llava.py
index cd0459128f6f..96d38c53c947 100644
--- a/src/transformers/models/llava/processing_llava.py
+++ b/src/transformers/models/llava/processing_llava.py
@@ -37,6 +37,8 @@ class LlavaProcessor(ProcessorMixin):
             The image processor is a required input.
         tokenizer ([`LlamaTokenizerFast`], *optional*):
             The tokenizer is a required input.
+        chat_template (`str`, *optional*): A Jinja template which will be used to convert lists of messages
+            in a chat into a tokenizable string.
     """
 
     attributes = ["image_processor", "tokenizer"]
diff --git a/src/transformers/models/llava_next/processing_llava_next.py b/src/transformers/models/llava_next/processing_llava_next.py
index 890326c55ee2..6c2ca2f90284 100644
--- a/src/transformers/models/llava_next/processing_llava_next.py
+++ b/src/transformers/models/llava_next/processing_llava_next.py
@@ -37,6 +37,8 @@ class LlavaNextProcessor(ProcessorMixin):
             The image processor is a required input.
         tokenizer ([`LlamaTokenizerFast`], *optional*):
             The tokenizer is a required input.
+        chat_template (`str`, *optional*): A Jinja template which will be used to convert lists of messages
+            in a chat into a tokenizable string.
     """
 
     attributes = ["image_processor", "tokenizer"]

From b4da1db53ae0bc6ac8e9c9882a06cc322444bf8d Mon Sep 17 00:00:00 2001
From: Matt <rocketknight1@gmail.com>
Date: Mon, 10 Jun 2024 14:06:13 +0100
Subject: [PATCH 10/12] Quick IDEFICS2 fixup

---
 src/transformers/models/idefics2/processing_idefics2.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/transformers/models/idefics2/processing_idefics2.py b/src/transformers/models/idefics2/processing_idefics2.py
index ce74f55647f0..4edb1813b8e0 100644
--- a/src/transformers/models/idefics2/processing_idefics2.py
+++ b/src/transformers/models/idefics2/processing_idefics2.py
@@ -74,14 +74,13 @@ def __init__(self, image_processor, tokenizer=None, image_seq_len: int = 64, cha
         self.image_token = AddedToken("<image>", normalized=False, special=True)
         self.end_of_utterance_token = AddedToken("<end_of_utterance>", normalized=False, special=True)
         self.image_seq_len = image_seq_len
-        self.chat_template = chat_template
 
         tokens_to_add = {
             "additional_special_tokens": [self.fake_image_token, self.image_token, self.end_of_utterance_token]
         }
         tokenizer.add_special_tokens(tokens_to_add)
 
-        super().__init__(image_processor, tokenizer)
+        super().__init__(image_processor, tokenizer, chat_template=chat_template)
 
     def _extract_images_from_prompts(self, prompts):
         prompt_images = []

From 670c846e601bdd9cf384848fad76c88badef2dd1 Mon Sep 17 00:00:00 2001
From: Matt <rocketknight1@gmail.com>
Date: Mon, 10 Jun 2024 14:13:36 +0100
Subject: [PATCH 11/12] Remove some old references to the Conversation class

---
 src/transformers/processing_utils.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/src/transformers/processing_utils.py b/src/transformers/processing_utils.py
index ce18b7de1f48..26b08a36b485 100644
--- a/src/transformers/processing_utils.py
+++ b/src/transformers/processing_utils.py
@@ -41,10 +41,6 @@
 )
 
 
-if TYPE_CHECKING:
-    from .pipelines.conversational import Conversation
-
-
 logger = logging.get_logger(__name__)
 
 # Dynamically import the Transformers module to grab the attribute classes of the processor form their names.
@@ -533,7 +529,7 @@ def model_input_names(self):
 
     def apply_chat_template(
         self,
-        conversation: Union[List[Dict[str, str]], "Conversation"],
+        conversation: Union[List[Dict[str, str]]],
         chat_template: Optional[str] = None,
         tokenize: bool = False,
         **kwargs,

From 7dbbcfeaeca1f374affc395a8ad6b8ae1d4593ce Mon Sep 17 00:00:00 2001
From: Matt <rocketknight1@gmail.com>
Date: Mon, 10 Jun 2024 14:33:38 +0100
Subject: [PATCH 12/12] make fixup

---
 src/transformers/processing_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/transformers/processing_utils.py b/src/transformers/processing_utils.py
index 26b08a36b485..a21d265b9d1b 100644
--- a/src/transformers/processing_utils.py
+++ b/src/transformers/processing_utils.py
@@ -22,7 +22,7 @@
 import os
 import warnings
 from pathlib import Path
-from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union
+from typing import Any, Dict, List, Optional, Tuple, Union
 
 from .dynamic_module_utils import custom_object_save
 from .tokenization_utils_base import PreTrainedTokenizerBase