From e31e466188dfeb19cb80254c7c7f691a98e4afb3 Mon Sep 17 00:00:00 2001 From: Matt Date: Wed, 15 Oct 2025 15:58:27 +0100 Subject: [PATCH 01/11] Flip the default return type for `apply_chat_template` to match the underlying tokenizer --- src/transformers/tokenization_utils_base.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/src/transformers/tokenization_utils_base.py b/src/transformers/tokenization_utils_base.py index 5d0dc48e5aee..c84e51d4011b 100644 --- a/src/transformers/tokenization_utils_base.py +++ b/src/transformers/tokenization_utils_base.py @@ -3192,7 +3192,7 @@ def apply_chat_template( truncation: bool = False, max_length: Optional[int] = None, return_tensors: Optional[Union[str, TensorType]] = None, - return_dict: bool = False, + return_dict: bool = True, return_assistant_tokens_mask: bool = False, tokenizer_kwargs: Optional[dict[str, Any]] = None, **kwargs, @@ -3265,14 +3265,11 @@ def apply_chat_template( set, will return a dict of tokenizer outputs instead. """ - if return_dict and not tokenize: - raise ValueError( - "`return_dict=True` is incompatible with `tokenize=False`, because there is no dict " - "of tokenizer outputs to return." - ) + if not tokenize: + return_dict = False # dicts are only returned by the tokenizer anyway - if return_assistant_tokens_mask and not return_dict: - raise ValueError("`return_assistant_tokens_mask=True` is incompatible with `return_dict=False`") + if return_assistant_tokens_mask and not (return_dict and tokenize): + raise ValueError("`return_assistant_tokens_mask=True` requires `return_dict=True` and `tokenize=True`") if tokenizer_kwargs is None: tokenizer_kwargs = {} From df855fe2b5bd49a5de82c7b9a94576b4f37c5fd1 Mon Sep 17 00:00:00 2001 From: Matt Date: Wed, 15 Oct 2025 16:12:29 +0100 Subject: [PATCH 02/11] Remove test_tokenization_for_chat tests, which no longer do anything useful --- tests/tokenization/test_tokenization_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/tokenization/test_tokenization_utils.py b/tests/tokenization/test_tokenization_utils.py index c8e5008813c1..39fb75269604 100644 --- a/tests/tokenization/test_tokenization_utils.py +++ b/tests/tokenization/test_tokenization_utils.py @@ -329,4 +329,4 @@ def test_encode_message_raises_on_add_generation_prompt(self): {"role": "user", "content": "Hey there, how are you?"}, ] with self.assertRaises(ValueError): - tokenizer.encode_message_with_chat_template(conversation[0], add_generation_prompt=True) + tokenizer.encode_message_with_chat_template(conversation[0], add_generation_prompt=True, return_dict=False) From 52f00286cb85863002f65fd761e13b16b092ea97 Mon Sep 17 00:00:00 2001 From: Matt Date: Wed, 15 Oct 2025 16:23:53 +0100 Subject: [PATCH 03/11] Remove test_tokenization_for_chat tests, which no longer do anything useful --- tests/models/gpt_sw3/test_tokenization_gpt_sw3.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/models/gpt_sw3/test_tokenization_gpt_sw3.py b/tests/models/gpt_sw3/test_tokenization_gpt_sw3.py index 6d3fd89a91ea..86e8bcabcbf2 100644 --- a/tests/models/gpt_sw3/test_tokenization_gpt_sw3.py +++ b/tests/models/gpt_sw3/test_tokenization_gpt_sw3.py @@ -15,7 +15,7 @@ import unittest from transformers import GPTSw3Tokenizer -from transformers.testing_utils import get_tests_dir, require_jinja, require_sentencepiece, require_tokenizers, slow +from transformers.testing_utils import get_tests_dir, require_sentencepiece, require_tokenizers, slow from ...test_tokenization_common import TokenizerTesterMixin From caf7303828436a31c0c1dccae6a72fe8daaeb283 Mon Sep 17 00:00:00 2001 From: Matt Date: Wed, 15 Oct 2025 16:53:45 +0100 Subject: [PATCH 04/11] Fix test_encode_message tests --- src/transformers/tokenization_utils_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/tokenization_utils_base.py b/src/transformers/tokenization_utils_base.py index c84e51d4011b..6da4f4d7b7d0 100644 --- a/src/transformers/tokenization_utils_base.py +++ b/src/transformers/tokenization_utils_base.py @@ -3390,7 +3390,7 @@ def encode_message_with_chat_template( tokens = self.apply_chat_template(conversation, add_generation_prompt=False, tokenize=True, **kwargs) prefix_tokens = self.apply_chat_template( - conversation_history, add_generation_prompt=False, tokenize=True, **kwargs + conversation_history, add_generation_prompt=False, tokenize=True, return_dict=False, **kwargs ) # It's possible that the prefix tokens are not a prefix of the full list of tokens. # For example, if the prefix is `User: Hi` and the full conversation is `User: HiAssistant: Hello`. From 51f85bd4cac5746d5ae77ae5461b542550f9f9c9 Mon Sep 17 00:00:00 2001 From: Matt Date: Wed, 15 Oct 2025 17:12:17 +0100 Subject: [PATCH 05/11] Fix test_encode_message tests --- src/transformers/tokenization_utils_base.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/transformers/tokenization_utils_base.py b/src/transformers/tokenization_utils_base.py index 6da4f4d7b7d0..65bab658c8b6 100644 --- a/src/transformers/tokenization_utils_base.py +++ b/src/transformers/tokenization_utils_base.py @@ -3384,10 +3384,14 @@ def encode_message_with_chat_template( ) if conversation_history is None or len(conversation_history) == 0: - return self.apply_chat_template([message], add_generation_prompt=False, tokenize=True, **kwargs) + return self.apply_chat_template( + [message], add_generation_prompt=False, tokenize=True, return_dict=False, **kwargs + ) conversation = conversation_history + [message] - tokens = self.apply_chat_template(conversation, add_generation_prompt=False, tokenize=True, **kwargs) + tokens = self.apply_chat_template( + conversation, add_generation_prompt=False, tokenize=True, return_dict=False, **kwargs + ) prefix_tokens = self.apply_chat_template( conversation_history, add_generation_prompt=False, tokenize=True, return_dict=False, **kwargs From 4117af18ed94e6c4648cf78c8419a2a2b736d71f Mon Sep 17 00:00:00 2001 From: Matt Date: Thu, 30 Oct 2025 16:59:27 +0000 Subject: [PATCH 06/11] nit fix --- tests/tokenization/test_tokenization_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/tokenization/test_tokenization_utils.py b/tests/tokenization/test_tokenization_utils.py index 39fb75269604..c8e5008813c1 100644 --- a/tests/tokenization/test_tokenization_utils.py +++ b/tests/tokenization/test_tokenization_utils.py @@ -329,4 +329,4 @@ def test_encode_message_raises_on_add_generation_prompt(self): {"role": "user", "content": "Hey there, how are you?"}, ] with self.assertRaises(ValueError): - tokenizer.encode_message_with_chat_template(conversation[0], add_generation_prompt=True, return_dict=False) + tokenizer.encode_message_with_chat_template(conversation[0], add_generation_prompt=True) From 9ac45800e71c259786f0c9c3d1b9a3c310cc823d Mon Sep 17 00:00:00 2001 From: Matt Date: Thu, 30 Oct 2025 17:30:49 +0000 Subject: [PATCH 07/11] Trigger tests From 3d595c69e472958536dc62cef409389ef63f7d6e Mon Sep 17 00:00:00 2001 From: Matt Date: Tue, 2 Dec 2025 16:45:22 +0000 Subject: [PATCH 08/11] Remove test_tokenization_for_chat --- .../test_tokenization_blenderbot.py | 19 ----------- tests/models/bloom/test_tokenization_bloom.py | 21 ------------ .../models/cohere/test_tokenization_cohere.py | 25 -------------- tests/models/gpt2/test_tokenization_gpt2.py | 20 ----------- .../gpt_sw3/test_tokenization_gpt_sw3.py | 33 ------------------- 5 files changed, 118 deletions(-) diff --git a/tests/models/blenderbot/test_tokenization_blenderbot.py b/tests/models/blenderbot/test_tokenization_blenderbot.py index 8f7c60f2bf2e..05e1ba383485 100644 --- a/tests/models/blenderbot/test_tokenization_blenderbot.py +++ b/tests/models/blenderbot/test_tokenization_blenderbot.py @@ -22,22 +22,3 @@ def test_pretokenized_inputs(self, *args, **kwargs): # with .split() loses the leading spaces, so the tokenization results differ pass - def test_tokenization_for_chat(self): - tok = self.get_tokenizer() - test_chats = [ - [{"role": "system", "content": "You are a helpful chatbot."}, {"role": "user", "content": "Hello!"}], - [ - {"role": "system", "content": "You are a helpful chatbot."}, - {"role": "user", "content": "Hello!"}, - {"role": "assistant", "content": "Nice to meet you."}, - ], - [{"role": "assistant", "content": "Nice to meet you."}, {"role": "user", "content": "Hello!"}], - ] - tokenized_chats = [tok.apply_chat_template(test_chat) for test_chat in test_chats] - expected_tokens = [ - [553, 366, 265, 4792, 3879, 73, 311, 21, 228, 228, 6950, 8, 2], - [553, 366, 265, 4792, 3879, 73, 311, 21, 228, 228, 6950, 8, 228, 3490, 287, 2273, 304, 21, 2], - [3490, 287, 2273, 304, 21, 228, 228, 6950, 8, 2], - ] - for tokenized_chat, expected_tokens in zip(tokenized_chats, expected_tokens): - self.assertListEqual(tokenized_chat, expected_tokens) diff --git a/tests/models/bloom/test_tokenization_bloom.py b/tests/models/bloom/test_tokenization_bloom.py index 267d377ed8e5..48cd71081636 100644 --- a/tests/models/bloom/test_tokenization_bloom.py +++ b/tests/models/bloom/test_tokenization_bloom.py @@ -129,27 +129,6 @@ def test_encodings_from_xnli_dataset(self): predicted_text = [tokenizer.decode(x, clean_up_tokenization_spaces=False) for x in output_tokens] self.assertListEqual(predicted_text, input_text) - @require_jinja - def test_tokenization_for_chat(self): - tokenizer = self.get_tokenizer() - tokenizer.chat_template = "{% for message in messages %}{{ message.content }}{{ eos_token }}{% endfor %}" - test_chats = [ - [{"role": "system", "content": "You are a helpful chatbot."}, {"role": "user", "content": "Hello!"}], - [ - {"role": "system", "content": "You are a helpful chatbot."}, - {"role": "user", "content": "Hello!"}, - {"role": "assistant", "content": "Nice to meet you."}, - ], - [{"role": "assistant", "content": "Nice to meet you."}, {"role": "user", "content": "Hello!"}], - ] - tokenized_chats = [tokenizer.apply_chat_template(test_chat) for test_chat in test_chats] - expected_tokens = [ - [5448, 1306, 267, 66799, 44799, 37143, 17, 2, 59414, 4, 2], - [5448, 1306, 267, 66799, 44799, 37143, 17, 2, 59414, 4, 2, 229126, 427, 11890, 1152, 17, 2], - [229126, 427, 11890, 1152, 17, 2, 59414, 4, 2], - ] - for tokenized_chat, expected_tokens in zip(tokenized_chats, expected_tokens): - self.assertListEqual(tokenized_chat, expected_tokens) def test_add_prefix_space_fast(self): tokenizer_w_prefix = self.get_tokenizer(add_prefix_space=True) diff --git a/tests/models/cohere/test_tokenization_cohere.py b/tests/models/cohere/test_tokenization_cohere.py index b428c4fa9bca..598dc9ccb188 100644 --- a/tests/models/cohere/test_tokenization_cohere.py +++ b/tests/models/cohere/test_tokenization_cohere.py @@ -73,31 +73,6 @@ def test_pretrained_model_lists(self): self.assertGreaterEqual(len(self.tokenizer_class.pretrained_vocab_files_map), 1) self.assertGreaterEqual(len(list(self.tokenizer_class.pretrained_vocab_files_map.values())[0]), 1) - @require_jinja - def test_tokenization_for_chat(self): - tokenizer = self.get_tokenizer() - test_chats = [ - [{"role": "system", "content": "You are a helpful chatbot."}, {"role": "user", "content": "Hello!"}], - [ - {"role": "system", "content": "You are a helpful chatbot."}, - {"role": "user", "content": "Hello!"}, - {"role": "assistant", "content": "Nice to meet you."}, - ], - ] - tokenized_chats = [tokenizer.apply_chat_template(test_chat) for test_chat in test_chats] - # fmt: off - expected_tokens = [ - [5, 36, 99, 59, 60, 41, 58, 60, 71, 55, 46, 71, 60, 61, 58, 54, 71, 60, 55, 51, 45, 54, 99, 38, 36, 99, 59, 65, 59, 60, 45, 53, 71, 60, 55, 51, 45, 54, 99, 38, 65, 243, 394, 204, 336, 84, 88, 887, 374, 216, 74, 286, 22, 8, 36, 99, 59, 60, 41, 58, 60, 71, 55, 46, 71, 60, 61, 58, 54, 71, 60, 55, 51, 45, 54, 99, 38, 36, 99, 61, 59, 45, 58, 71, 60, 55, 51, 45, 54, 99, 38, 48, 420, 87, 9, 8], - [5, 36, 99, 59, 60, 41, 58, 60, 71, 55, 46, 71, 60, 61, 58, 54, 71, 60, 55, 51, 45, 54, 99, 38, 36, 99, 59, 65, - 59, 60, 45, 53, 71, 60, 55, 51, 45, 54, 99, 38, 65, 243, 394, 204, 336, 84, 88, 887, 374, 216, 74, 286, 22, 8, - 36, 99, 59, 60, 41, 58, 60, 71, 55, 46, 71, 60, 61, 58, 54, 71, 60, 55, 51, 45, 54, 99, 38, 36, 99, 61, 59, - 45, 58, 71, 60, 55, 51, 45, 54, 99, 38, 48, 420, 87, 9, 8, 36, 99, 59, 60, 41, 58, 60, 71, 55, 46, 71, 60, 61, - 58, 54, 71, 60, 55, 51, 45, 54, 99, 38, 36, 99, 43, 48, 41, 60, 42, 55, 60, 71, 60, 55, 51, 45, 54, 99, 38, - 54, 567, 235, 693, 276, 411, 243, 22, 8] - ] - # fmt: on - for tokenized_chat, expected_tokens in zip(tokenized_chats, expected_tokens): - self.assertListEqual(tokenized_chat, expected_tokens) @require_jinja def test_tokenization_for_tool_use(self): diff --git a/tests/models/gpt2/test_tokenization_gpt2.py b/tests/models/gpt2/test_tokenization_gpt2.py index de85bde7666d..3e4fcda0cf9f 100644 --- a/tests/models/gpt2/test_tokenization_gpt2.py +++ b/tests/models/gpt2/test_tokenization_gpt2.py @@ -67,26 +67,6 @@ def test_special_tokens_mask_input_pairs_and_bos_token(self): filtered_sequence = [x for x in filtered_sequence if x is not None] self.assertEqual(encoded_sequence, filtered_sequence) - @require_jinja - def test_tokenization_for_chat(self): - tokenizer = GPT2Tokenizer.from_pretrained(self.tmpdirname) - tokenizer.chat_template = "{% for message in messages %}{{ message.content }}{{ eos_token }}{% endfor %}" - test_chats = [ - [{"role": "system", "content": "You are a helpful chatbot."}, {"role": "user", "content": "Hello!"}], - [ - {"role": "system", "content": "You are a helpful chatbot."}, - {"role": "user", "content": "Hello!"}, - {"role": "assistant", "content": "Nice to meet you."}, - ], - [{"role": "assistant", "content": "Nice to meet you."}, {"role": "user", "content": "Hello!"}], - ] - tokenized_chats = [tokenizer.apply_chat_template(test_chat) for test_chat in test_chats] - # fmt: off - expected_tokens = [[1639, 389, 257, 7613, 8537, 13645, 13, 50256, 15496, 0, 50256], [1639, 389, 257, 7613, 8537, 13645, 13, 50256, 15496, 0, 50256, 35284, 284, 1826, 345, 13, 50256], [35284, 284, 1826, 345, 13, 50256, 15496, 0, 50256]] - # fmt: on - for tokenized_chat, expected_tokens in zip(tokenized_chats, expected_tokens): - self.assertListEqual(tokenized_chat, expected_tokens) - @require_tiktoken def test_tokenization_tiktoken(self): from tiktoken import encoding_name_for_model diff --git a/tests/models/gpt_sw3/test_tokenization_gpt_sw3.py b/tests/models/gpt_sw3/test_tokenization_gpt_sw3.py index 86e8bcabcbf2..7dbcd524e810 100644 --- a/tests/models/gpt_sw3/test_tokenization_gpt_sw3.py +++ b/tests/models/gpt_sw3/test_tokenization_gpt_sw3.py @@ -129,36 +129,3 @@ def test_tokenizer_integration(self): model_name="AI-Sweden-Models/gpt-sw3-126m", sequences=sequences, ) - - @require_jinja - def test_tokenization_for_chat(self): - tokenizer = GPTSw3Tokenizer(SAMPLE_VOCAB, name_or_path="test") - tokenizer.chat_template = ( - "{{ eos_token }}{{ bos_token }}" - "{% for message in messages %}" - "{% if message['role'] == 'user' %}{{ 'User: ' + message['content']}}" - "{% else %}{{ 'Bot: ' + message['content']}}{% endif %}" - "{{ message['text'] }}{{ bos_token }}" - "{% endfor %}" - "Bot:" - ) - # This is in English, but it's just here to make sure the chat control tokens are being added properly - test_chats = [ - [{"role": "system", "content": "You are a helpful chatbot."}, {"role": "user", "content": "Hello!"}], - [ - {"role": "system", "content": "You are a helpful chatbot."}, - {"role": "user", "content": "Hello!"}, - {"role": "assistant", "content": "Nice to meet you."}, - ], - [{"role": "assistant", "content": "Nice to meet you."}, {"role": "user", "content": "Hello!"}], - ] - tokenized_chats = [tokenizer.apply_chat_template(test_chat) for test_chat in test_chats] - # fmt: off - expected_tokens = [ - [2000, 1, 575, 541, 419, 530, 339, 265, 878, 708, 727, 275, 347, 541, 260, 1, 968, 263, 314, 419, 366, 354, 294, 360, 1, 575, 541, 419], - [2000, 1, 575, 541, 419, 530, 339, 265, 878, 708, 727, 275, 347, 541, 260, 1, 968, 263, 314, 419, 366, 354, 294, 360, 1, 575, 541, 419, 984, 429, 281, 264, 1261, 291, 260, 1, 575, 541, 419], - [2000, 1, 575, 541, 419, 984, 429, 281, 264, 1261, 291, 260, 1, 968, 263, 314, 419, 366, 354, 294, 360, 1, 575, 541, 419] - ] - # fmt: on - for tokenized_chat, expected_tokens in zip(tokenized_chats, expected_tokens): - self.assertListEqual(tokenized_chat, expected_tokens) From 67797dce7dca86b4125ff9ffaf68631c4638ef71 Mon Sep 17 00:00:00 2001 From: Matt Date: Tue, 2 Dec 2025 16:45:48 +0000 Subject: [PATCH 09/11] make fixup --- tests/models/blenderbot/test_tokenization_blenderbot.py | 1 - tests/models/bloom/test_tokenization_bloom.py | 3 +-- tests/models/cohere/test_tokenization_cohere.py | 1 - tests/models/gpt2/test_tokenization_gpt2.py | 2 +- 4 files changed, 2 insertions(+), 5 deletions(-) diff --git a/tests/models/blenderbot/test_tokenization_blenderbot.py b/tests/models/blenderbot/test_tokenization_blenderbot.py index 05e1ba383485..37fece070949 100644 --- a/tests/models/blenderbot/test_tokenization_blenderbot.py +++ b/tests/models/blenderbot/test_tokenization_blenderbot.py @@ -21,4 +21,3 @@ def test_pretokenized_inputs(self, *args, **kwargs): # The issue is that when you have a sequence with leading spaces, splitting it # with .split() loses the leading spaces, so the tokenization results differ pass - diff --git a/tests/models/bloom/test_tokenization_bloom.py b/tests/models/bloom/test_tokenization_bloom.py index 48cd71081636..6d0ea31f3f8a 100644 --- a/tests/models/bloom/test_tokenization_bloom.py +++ b/tests/models/bloom/test_tokenization_bloom.py @@ -17,7 +17,7 @@ from datasets import load_dataset from transformers import TokenizersBackend -from transformers.testing_utils import require_jinja, require_tokenizers, slow +from transformers.testing_utils import require_tokenizers, slow from ...test_tokenization_common import TokenizerTesterMixin @@ -129,7 +129,6 @@ def test_encodings_from_xnli_dataset(self): predicted_text = [tokenizer.decode(x, clean_up_tokenization_spaces=False) for x in output_tokens] self.assertListEqual(predicted_text, input_text) - def test_add_prefix_space_fast(self): tokenizer_w_prefix = self.get_tokenizer(add_prefix_space=True) tokenizer_wo_prefix = self.get_tokenizer(add_prefix_space=False) diff --git a/tests/models/cohere/test_tokenization_cohere.py b/tests/models/cohere/test_tokenization_cohere.py index 598dc9ccb188..75de9835fa01 100644 --- a/tests/models/cohere/test_tokenization_cohere.py +++ b/tests/models/cohere/test_tokenization_cohere.py @@ -73,7 +73,6 @@ def test_pretrained_model_lists(self): self.assertGreaterEqual(len(self.tokenizer_class.pretrained_vocab_files_map), 1) self.assertGreaterEqual(len(list(self.tokenizer_class.pretrained_vocab_files_map.values())[0]), 1) - @require_jinja def test_tokenization_for_tool_use(self): tokenizer = self.get_tokenizer() diff --git a/tests/models/gpt2/test_tokenization_gpt2.py b/tests/models/gpt2/test_tokenization_gpt2.py index 3e4fcda0cf9f..8e409064320c 100644 --- a/tests/models/gpt2/test_tokenization_gpt2.py +++ b/tests/models/gpt2/test_tokenization_gpt2.py @@ -16,7 +16,7 @@ import unittest from transformers import AutoTokenizer, GPT2Tokenizer -from transformers.testing_utils import require_jinja, require_tiktoken, require_tokenizers +from transformers.testing_utils import require_tiktoken, require_tokenizers from ...test_tokenization_common import TokenizerTesterMixin From da06bddec428bf778ad20804c1996253e6556c37 Mon Sep 17 00:00:00 2001 From: Matt Date: Tue, 2 Dec 2025 16:55:06 +0000 Subject: [PATCH 10/11] Add a little test to make sure that doesn't happen again --- tests/test_tokenization_common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_tokenization_common.py b/tests/test_tokenization_common.py index 808823c3ff44..a8deadeb79d4 100644 --- a/tests/test_tokenization_common.py +++ b/tests/test_tokenization_common.py @@ -950,7 +950,7 @@ def test_chat_template(self): dummy_conversation, chat_template=dummy_template, tokenize=True, return_dict=False ) dict_output = tokenizer.apply_chat_template( - dummy_conversation, chat_template=dummy_template, tokenize=True, return_dict=True + dummy_conversation, chat_template=dummy_template, tokenize=True # This also checks return_dict is default ) self.assertEqual(dict_output["input_ids"], output) # Test return_dict behaviour matches From a697124e0910652d718473572ed82256ac1c150e Mon Sep 17 00:00:00 2001 From: Matt Date: Tue, 2 Dec 2025 16:56:44 +0000 Subject: [PATCH 11/11] make fixup --- tests/test_tokenization_common.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/test_tokenization_common.py b/tests/test_tokenization_common.py index a8deadeb79d4..673f8def3159 100644 --- a/tests/test_tokenization_common.py +++ b/tests/test_tokenization_common.py @@ -950,7 +950,9 @@ def test_chat_template(self): dummy_conversation, chat_template=dummy_template, tokenize=True, return_dict=False ) dict_output = tokenizer.apply_chat_template( - dummy_conversation, chat_template=dummy_template, tokenize=True # This also checks return_dict is default + dummy_conversation, + chat_template=dummy_template, + tokenize=True, # This also checks return_dict=True is the default ) self.assertEqual(dict_output["input_ids"], output) # Test return_dict behaviour matches