From d10bc1c6a052f8263dbf059874348d972b290f71 Mon Sep 17 00:00:00 2001 From: Matt Date: Fri, 10 May 2024 15:09:40 +0100 Subject: [PATCH 01/12] Let's try moving chat templates out of IDEFICS and into the generic ProcessorMixin --- .../models/idefics2/processing_idefics2.py | 48 +---------------- src/transformers/processing_utils.py | 51 ++++++++++++++++++- 2 files changed, 50 insertions(+), 49 deletions(-) diff --git a/src/transformers/models/idefics2/processing_idefics2.py b/src/transformers/models/idefics2/processing_idefics2.py index e9f9f9233736..447068607ec3 100644 --- a/src/transformers/models/idefics2/processing_idefics2.py +++ b/src/transformers/models/idefics2/processing_idefics2.py @@ -16,7 +16,7 @@ Processor class for IDEFICS2. """ -from typing import TYPE_CHECKING, Dict, List, Optional, Union +from typing import TYPE_CHECKING, List, Optional, Union from ...feature_extraction_utils import BatchFeature from ...image_utils import ImageInput, is_valid_image, load_image @@ -78,9 +78,6 @@ def __init__(self, image_processor, tokenizer=None, image_seq_len: int = 64, **k } tokenizer.add_special_tokens(tokens_to_add) - # Stores a Jinja template that formats chat histories into tokenizable strings - self.chat_template = kwargs.pop("chat_template", None) - super().__init__(image_processor, tokenizer) def _extract_images_from_prompts(self, prompts): @@ -252,49 +249,6 @@ def model_input_names(self): image_processor_input_names = self.image_processor.model_input_names return list(dict.fromkeys(tokenizer_input_names + image_processor_input_names)) - def apply_chat_template( - self, - conversation: Union[List[Dict[str, str]]], - chat_template: Optional[str] = None, - tokenize: bool = False, - **kwargs, - ) -> str: - """ - Overrides the tokenizer's `apply_chat_template` method to apply the IDEFICS2 chat template by default - if no chat template is provided. - - By default, the output isn't tokenized. This is because the IDEFICS2 chat template is designed to insert - the image token into the sequence according to the message, but does not handle expanding the image - tokens to the sequence length or adding the surrounding tokens e.g. . - - Args: - conversation (`Union[List[Dict, str, str]]`): - The conversation to format. - chat_template (`Optional[str]`, *optional*): - The Jinja template to use for formatting the conversation. If not provided, the default chat template - is used. - tokenize (`bool`, *optional*, defaults to `False`): - Whether to tokenize the output or not. - **kwargs: - Additional keyword arguments for the tokenizer's `apply_chat_template` method. - """ - - if chat_template is None: - if self.chat_template is not None: - chat_template = self.chat_template - else: - logger.warning_once( - "No chat template is set for this processor, falling back to a default class-level template. This is " - "very error-prone, because models are often trained with templates different from the class default! " - "Default chat templates are a legacy feature and will be removed in Transformers v4.43, at which " - "point any code depending on them will stop working. We recommend setting a valid chat template before " - "then to ensure that this model continues working without issues." - ) - chat_template = self.default_chat_template - return self.tokenizer.apply_chat_template( - conversation, chat_template=chat_template, tokenize=tokenize, **kwargs - ) - @property def default_chat_template(self): """ diff --git a/src/transformers/processing_utils.py b/src/transformers/processing_utils.py index d76fa4dccccf..3f707f4d75e7 100644 --- a/src/transformers/processing_utils.py +++ b/src/transformers/processing_utils.py @@ -22,7 +22,7 @@ import os import warnings from pathlib import Path -from typing import Any, Dict, Optional, Tuple, Union +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union from .dynamic_module_utils import custom_object_save from .tokenization_utils_base import PreTrainedTokenizerBase @@ -41,6 +41,10 @@ ) +if TYPE_CHECKING: + from .pipelines.conversational import Conversation + + logger = logging.get_logger(__name__) # Dynamically import the Transformers module to grab the attribute classes of the processor form their names. @@ -59,7 +63,7 @@ class ProcessorMixin(PushToHubMixin): This is a mixin used to provide saving/loading functionality for all processor classes. """ - attributes = ["feature_extractor", "tokenizer"] + attributes = ["feature_extractor", "tokenizer", "chat_template"] # Names need to be attr_class for attr in attributes feature_extractor_class = None tokenizer_class = None @@ -522,6 +526,49 @@ def model_input_names(self): first_attribute = getattr(self, self.attributes[0]) return getattr(first_attribute, "model_input_names", None) + def apply_chat_template( + self, + conversation: Union[List[Dict[str, str]], "Conversation"], + chat_template: Optional[str] = None, + tokenize: bool = False, + **kwargs, + ) -> str: + """ + Overrides the tokenizer's `apply_chat_template` method to apply the IDEFICS2 chat template by default + if no chat template is provided. + + By default, the output isn't tokenized. This is because the IDEFICS2 chat template is designed to insert + the image token into the sequence according to the message, but does not handle expanding the image + tokens to the sequence length or adding the surrounding tokens e.g. . + + Args: + conversation (`Union[List[Dict, str, str], "Conversation"]`): + The conversation to format. + chat_template (`Optional[str]`, *optional*): + The Jinja template to use for formatting the conversation. If not provided, the default chat template + is used. + tokenize (`bool`, *optional*, defaults to `False`): + Whether to tokenize the output or not. + **kwargs: + Additional keyword arguments for the tokenizer's `apply_chat_template` method. + """ + + if chat_template is None: + if self.chat_template is not None: + chat_template = self.chat_template + else: + logger.warning_once( + "No chat template is set for this processor, falling back to a default class-level template. This is " + "very error-prone, because models are often trained with templates different from the class default! " + "Default chat templates are a legacy feature and will be removed in Transformers v4.43, at which " + "point any code depending on them will stop working. We recommend setting a valid chat template before " + "then to ensure that this model continues working without issues." + ) + chat_template = self.default_chat_template + return self.tokenizer.apply_chat_template( + conversation, chat_template=chat_template, tokenize=tokenize, **kwargs + ) + ProcessorMixin.push_to_hub = copy_func(ProcessorMixin.push_to_hub) if ProcessorMixin.push_to_hub.__doc__ is not None: From 8fa0e8cb1632a1b3ecebfefd53b24bd554df0d6b Mon Sep 17 00:00:00 2001 From: Matt Date: Fri, 10 May 2024 15:15:20 +0100 Subject: [PATCH 02/12] Chat templates should not be mandatory --- src/transformers/processing_utils.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/transformers/processing_utils.py b/src/transformers/processing_utils.py index 3f707f4d75e7..3494908210c3 100644 --- a/src/transformers/processing_utils.py +++ b/src/transformers/processing_utils.py @@ -63,7 +63,8 @@ class ProcessorMixin(PushToHubMixin): This is a mixin used to provide saving/loading functionality for all processor classes. """ - attributes = ["feature_extractor", "tokenizer", "chat_template"] + attributes = ["feature_extractor", "tokenizer"] + optional_attributes = ["chat_template"] # Names need to be attr_class for attr in attributes feature_extractor_class = None tokenizer_class = None @@ -71,6 +72,9 @@ class ProcessorMixin(PushToHubMixin): # args have to match the attributes class attribute def __init__(self, *args, **kwargs): + # First, extract optional attributes from kwargs if present + for optional_attribute in self.optional_attributes: + setattr(self, optional_attribute, kwargs.pop(optional_attribute, None)) # Sanitize args and kwargs for key in kwargs: if key not in self.attributes: From 445c77ff7cd3406456b66c56bf8845b53e07bf1b Mon Sep 17 00:00:00 2001 From: Matt Date: Fri, 10 May 2024 15:16:06 +0100 Subject: [PATCH 03/12] Chat templates should not be mandatory --- src/transformers/processing_utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/transformers/processing_utils.py b/src/transformers/processing_utils.py index 3494908210c3..9b30d08be075 100644 --- a/src/transformers/processing_utils.py +++ b/src/transformers/processing_utils.py @@ -73,6 +73,7 @@ class ProcessorMixin(PushToHubMixin): # args have to match the attributes class attribute def __init__(self, *args, **kwargs): # First, extract optional attributes from kwargs if present + # Optional attributes can never be positional arguments for optional_attribute in self.optional_attributes: setattr(self, optional_attribute, kwargs.pop(optional_attribute, None)) # Sanitize args and kwargs From a16b53a2d18ed22b04acaf3a07dd58cb366555b1 Mon Sep 17 00:00:00 2001 From: Matt Date: Fri, 10 May 2024 15:33:01 +0100 Subject: [PATCH 04/12] Not all classes will have default chat templates --- src/transformers/processing_utils.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/transformers/processing_utils.py b/src/transformers/processing_utils.py index 9b30d08be075..e7d8ee5ee465 100644 --- a/src/transformers/processing_utils.py +++ b/src/transformers/processing_utils.py @@ -561,7 +561,7 @@ def apply_chat_template( if chat_template is None: if self.chat_template is not None: chat_template = self.chat_template - else: + elif getattr(self, "default_chat_template", None) is not None: logger.warning_once( "No chat template is set for this processor, falling back to a default class-level template. This is " "very error-prone, because models are often trained with templates different from the class default! " @@ -570,6 +570,12 @@ def apply_chat_template( "then to ensure that this model continues working without issues." ) chat_template = self.default_chat_template + else: + raise ValueError( + "No chat template is set for this processor. Please either set the `chat_template` attribute, " + "or provide a chat template as an argument. See " + "https://huggingface.co/docs/transformers/main/en/chat_templating for more information." + ) return self.tokenizer.apply_chat_template( conversation, chat_template=chat_template, tokenize=tokenize, **kwargs ) From ce831e6ceb7594182037cf52150456ad9fd9c451 Mon Sep 17 00:00:00 2001 From: Matt Date: Mon, 13 May 2024 16:53:01 +0100 Subject: [PATCH 05/12] stash commit --- src/transformers/models/idefics2/processing_idefics2.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/transformers/models/idefics2/processing_idefics2.py b/src/transformers/models/idefics2/processing_idefics2.py index 447068607ec3..c87e3f119436 100644 --- a/src/transformers/models/idefics2/processing_idefics2.py +++ b/src/transformers/models/idefics2/processing_idefics2.py @@ -62,7 +62,7 @@ class Idefics2Processor(ProcessorMixin): image_processor_class = "Idefics2ImageProcessor" tokenizer_class = "AutoTokenizer" - def __init__(self, image_processor, tokenizer=None, image_seq_len: int = 64, **kwargs): + def __init__(self, image_processor, tokenizer=None, image_seq_len: int = 64, chat_template: str = None, **kwargs): if image_processor is None: raise ValueError("You need to specify an `image_processor`.") if tokenizer is None: @@ -72,6 +72,7 @@ def __init__(self, image_processor, tokenizer=None, image_seq_len: int = 64, **k self.image_token = AddedToken("", normalized=False, special=True) self.end_of_utterance_token = AddedToken("", normalized=False, special=True) self.image_seq_len = image_seq_len + self.chat_template = chat_template tokens_to_add = { "additional_special_tokens": [self.fake_image_token, self.image_token, self.end_of_utterance_token] From ca685a27e8b7b5207e7bfec1194b92acf1cf78f8 Mon Sep 17 00:00:00 2001 From: Matt Date: Fri, 7 Jun 2024 17:16:54 +0100 Subject: [PATCH 06/12] Add chat template docstring --- src/transformers/models/idefics2/processing_idefics2.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/transformers/models/idefics2/processing_idefics2.py b/src/transformers/models/idefics2/processing_idefics2.py index c87e3f119436..ef48ff050e1e 100644 --- a/src/transformers/models/idefics2/processing_idefics2.py +++ b/src/transformers/models/idefics2/processing_idefics2.py @@ -56,6 +56,9 @@ class Idefics2Processor(ProcessorMixin): The length of the image sequence i.e. the number of tokens per image in the input. This parameter is used to build the string from the input prompt and image tokens and should match the config.perceiver_config.resampler_n_latents value for the model used. + chat_template (`str`, *optional*): A Jinja template which will be used to convert lists of messages + in a chat into a tokenizable string. This argument is optional, and only relevant to processors that support + chat inputs. """ attributes = ["image_processor", "tokenizer"] From 804636e721b0bfb16a2ffa46b1ada04a273b6626 Mon Sep 17 00:00:00 2001 From: Matt Date: Fri, 7 Jun 2024 17:35:34 +0100 Subject: [PATCH 07/12] Clean up docstring --- src/transformers/processing_utils.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/src/transformers/processing_utils.py b/src/transformers/processing_utils.py index e7d8ee5ee465..ce18b7de1f48 100644 --- a/src/transformers/processing_utils.py +++ b/src/transformers/processing_utils.py @@ -539,15 +539,11 @@ def apply_chat_template( **kwargs, ) -> str: """ - Overrides the tokenizer's `apply_chat_template` method to apply the IDEFICS2 chat template by default - if no chat template is provided. - - By default, the output isn't tokenized. This is because the IDEFICS2 chat template is designed to insert - the image token into the sequence according to the message, but does not handle expanding the image - tokens to the sequence length or adding the surrounding tokens e.g. . + Similar to the `apply_chat_template` method on tokenizers, this method applies a Jinja template to input + conversations to turn them into a single tokenizable string. Args: - conversation (`Union[List[Dict, str, str], "Conversation"]`): + conversation (`List[Dict, str, str]`): The conversation to format. chat_template (`Optional[str]`, *optional*): The Jinja template to use for formatting the conversation. If not provided, the default chat template @@ -555,7 +551,7 @@ def apply_chat_template( tokenize (`bool`, *optional*, defaults to `False`): Whether to tokenize the output or not. **kwargs: - Additional keyword arguments for the tokenizer's `apply_chat_template` method. + Additional keyword arguments """ if chat_template is None: From b1ecb6448478ed42fd1fcfd4f8416acbec171c07 Mon Sep 17 00:00:00 2001 From: Matt Date: Fri, 7 Jun 2024 18:16:44 +0100 Subject: [PATCH 08/12] Add chat templates to LLaVA/LLaVA-next --- src/transformers/models/llava/processing_llava.py | 4 ++-- src/transformers/models/llava_next/processing_llava_next.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/transformers/models/llava/processing_llava.py b/src/transformers/models/llava/processing_llava.py index 7016cd500969..cd0459128f6f 100644 --- a/src/transformers/models/llava/processing_llava.py +++ b/src/transformers/models/llava/processing_llava.py @@ -43,8 +43,8 @@ class LlavaProcessor(ProcessorMixin): image_processor_class = "AutoImageProcessor" tokenizer_class = "AutoTokenizer" - def __init__(self, image_processor=None, tokenizer=None): - super().__init__(image_processor, tokenizer) + def __init__(self, image_processor=None, tokenizer=None, chat_template=None): + super().__init__(image_processor, tokenizer, chat_template=chat_template) def __call__( self, diff --git a/src/transformers/models/llava_next/processing_llava_next.py b/src/transformers/models/llava_next/processing_llava_next.py index 91cd544ab648..890326c55ee2 100644 --- a/src/transformers/models/llava_next/processing_llava_next.py +++ b/src/transformers/models/llava_next/processing_llava_next.py @@ -43,8 +43,8 @@ class LlavaNextProcessor(ProcessorMixin): image_processor_class = "AutoImageProcessor" tokenizer_class = "AutoTokenizer" - def __init__(self, image_processor=None, tokenizer=None): - super().__init__(image_processor, tokenizer) + def __init__(self, image_processor=None, tokenizer=None, chat_template=None): + super().__init__(image_processor, tokenizer, chat_template=chat_template) def __call__( self, From c440ca5fd7b18ac284a1bd8292651147f91effe0 Mon Sep 17 00:00:00 2001 From: Matt Date: Fri, 7 Jun 2024 22:58:30 +0100 Subject: [PATCH 09/12] Docstring fixup --- src/transformers/models/idefics2/processing_idefics2.py | 3 +-- src/transformers/models/llava/processing_llava.py | 2 ++ src/transformers/models/llava_next/processing_llava_next.py | 2 ++ 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/transformers/models/idefics2/processing_idefics2.py b/src/transformers/models/idefics2/processing_idefics2.py index ef48ff050e1e..ce74f55647f0 100644 --- a/src/transformers/models/idefics2/processing_idefics2.py +++ b/src/transformers/models/idefics2/processing_idefics2.py @@ -57,8 +57,7 @@ class Idefics2Processor(ProcessorMixin): This parameter is used to build the string from the input prompt and image tokens and should match the config.perceiver_config.resampler_n_latents value for the model used. chat_template (`str`, *optional*): A Jinja template which will be used to convert lists of messages - in a chat into a tokenizable string. This argument is optional, and only relevant to processors that support - chat inputs. + in a chat into a tokenizable string. """ attributes = ["image_processor", "tokenizer"] diff --git a/src/transformers/models/llava/processing_llava.py b/src/transformers/models/llava/processing_llava.py index cd0459128f6f..96d38c53c947 100644 --- a/src/transformers/models/llava/processing_llava.py +++ b/src/transformers/models/llava/processing_llava.py @@ -37,6 +37,8 @@ class LlavaProcessor(ProcessorMixin): The image processor is a required input. tokenizer ([`LlamaTokenizerFast`], *optional*): The tokenizer is a required input. + chat_template (`str`, *optional*): A Jinja template which will be used to convert lists of messages + in a chat into a tokenizable string. """ attributes = ["image_processor", "tokenizer"] diff --git a/src/transformers/models/llava_next/processing_llava_next.py b/src/transformers/models/llava_next/processing_llava_next.py index 890326c55ee2..6c2ca2f90284 100644 --- a/src/transformers/models/llava_next/processing_llava_next.py +++ b/src/transformers/models/llava_next/processing_llava_next.py @@ -37,6 +37,8 @@ class LlavaNextProcessor(ProcessorMixin): The image processor is a required input. tokenizer ([`LlamaTokenizerFast`], *optional*): The tokenizer is a required input. + chat_template (`str`, *optional*): A Jinja template which will be used to convert lists of messages + in a chat into a tokenizable string. """ attributes = ["image_processor", "tokenizer"] From b4da1db53ae0bc6ac8e9c9882a06cc322444bf8d Mon Sep 17 00:00:00 2001 From: Matt Date: Mon, 10 Jun 2024 14:06:13 +0100 Subject: [PATCH 10/12] Quick IDEFICS2 fixup --- src/transformers/models/idefics2/processing_idefics2.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/transformers/models/idefics2/processing_idefics2.py b/src/transformers/models/idefics2/processing_idefics2.py index ce74f55647f0..4edb1813b8e0 100644 --- a/src/transformers/models/idefics2/processing_idefics2.py +++ b/src/transformers/models/idefics2/processing_idefics2.py @@ -74,14 +74,13 @@ def __init__(self, image_processor, tokenizer=None, image_seq_len: int = 64, cha self.image_token = AddedToken("", normalized=False, special=True) self.end_of_utterance_token = AddedToken("", normalized=False, special=True) self.image_seq_len = image_seq_len - self.chat_template = chat_template tokens_to_add = { "additional_special_tokens": [self.fake_image_token, self.image_token, self.end_of_utterance_token] } tokenizer.add_special_tokens(tokens_to_add) - super().__init__(image_processor, tokenizer) + super().__init__(image_processor, tokenizer, chat_template=chat_template) def _extract_images_from_prompts(self, prompts): prompt_images = [] From 670c846e601bdd9cf384848fad76c88badef2dd1 Mon Sep 17 00:00:00 2001 From: Matt Date: Mon, 10 Jun 2024 14:13:36 +0100 Subject: [PATCH 11/12] Remove some old references to the Conversation class --- src/transformers/processing_utils.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/transformers/processing_utils.py b/src/transformers/processing_utils.py index ce18b7de1f48..26b08a36b485 100644 --- a/src/transformers/processing_utils.py +++ b/src/transformers/processing_utils.py @@ -41,10 +41,6 @@ ) -if TYPE_CHECKING: - from .pipelines.conversational import Conversation - - logger = logging.get_logger(__name__) # Dynamically import the Transformers module to grab the attribute classes of the processor form their names. @@ -533,7 +529,7 @@ def model_input_names(self): def apply_chat_template( self, - conversation: Union[List[Dict[str, str]], "Conversation"], + conversation: Union[List[Dict[str, str]]], chat_template: Optional[str] = None, tokenize: bool = False, **kwargs, From 7dbbcfeaeca1f374affc395a8ad6b8ae1d4593ce Mon Sep 17 00:00:00 2001 From: Matt Date: Mon, 10 Jun 2024 14:33:38 +0100 Subject: [PATCH 12/12] make fixup --- src/transformers/processing_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/processing_utils.py b/src/transformers/processing_utils.py index 26b08a36b485..a21d265b9d1b 100644 --- a/src/transformers/processing_utils.py +++ b/src/transformers/processing_utils.py @@ -22,7 +22,7 @@ import os import warnings from pathlib import Path -from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union +from typing import Any, Dict, List, Optional, Tuple, Union from .dynamic_module_utils import custom_object_save from .tokenization_utils_base import PreTrainedTokenizerBase