diff --git a/src/transformers/models/voxtral/processing_voxtral.py b/src/transformers/models/voxtral/processing_voxtral.py
index 47fe00bf3e9f..15a086703cf7 100644
--- a/src/transformers/models/voxtral/processing_voxtral.py
+++ b/src/transformers/models/voxtral/processing_voxtral.py
@@ -206,7 +206,7 @@ def apply_chat_template(
         tokenizer_kwargs = {**processed_kwargs["template_kwargs"], **text_kwargs}
         tokenizer_kwargs["return_tensors"] = None  # let's not return tensors here
         tokenize = tokenizer_kwargs.pop("tokenize", False)
-        return_dict = tokenizer_kwargs.pop("return_dict", False)
+        return_dict = tokenizer_kwargs.pop("return_dict", True)
 
         encoded_instruct_inputs = self.tokenizer.apply_chat_template(
             conversations,
diff --git a/src/transformers/processing_utils.py b/src/transformers/processing_utils.py
index a70018c6cf2e..c3487aca431b 100644
--- a/src/transformers/processing_utils.py
+++ b/src/transformers/processing_utils.py
@@ -1603,7 +1603,7 @@ def apply_chat_template(
             conversations = [conversation]
 
         tokenize = processed_kwargs["template_kwargs"].pop("tokenize", False)
-        return_dict = processed_kwargs["template_kwargs"].pop("return_dict", False)
+        return_dict = processed_kwargs["template_kwargs"].pop("return_dict", True)
         mm_load_kwargs = processed_kwargs["mm_load_kwargs"]
 
         if tokenize:
diff --git a/src/transformers/tokenization_mistral_common.py b/src/transformers/tokenization_mistral_common.py
index 3eae0a6f6878..713d0cbc6bc1 100644
--- a/src/transformers/tokenization_mistral_common.py
+++ b/src/transformers/tokenization_mistral_common.py
@@ -1378,7 +1378,7 @@ def apply_chat_template(
         truncation: bool = False,
         max_length: Optional[int] = None,
         return_tensors: Optional[Union[str, TensorType]] = None,
-        return_dict: bool = False,
+        return_dict: bool = True,
         **kwargs,
     ) -> Union[str, list[int], list[str], list[list[int]], BatchEncoding]:
         """
diff --git a/src/transformers/tokenization_utils_base.py b/src/transformers/tokenization_utils_base.py
index afdd8270987a..24228738fcde 100644
--- a/src/transformers/tokenization_utils_base.py
+++ b/src/transformers/tokenization_utils_base.py
@@ -1588,7 +1588,7 @@ def apply_chat_template(
         truncation: bool = False,
         max_length: Optional[int] = None,
         return_tensors: Optional[Union[str, TensorType]] = None,
-        return_dict: bool = False,
+        return_dict: bool = True,
         return_assistant_tokens_mask: bool = False,
         tokenizer_kwargs: Optional[dict[str, Any]] = None,
         **kwargs,
@@ -1661,14 +1661,11 @@ def apply_chat_template(
             set, will return a dict of tokenizer outputs instead.
         """
 
-        if return_dict and not tokenize:
-            raise ValueError(
-                "`return_dict=True` is incompatible with `tokenize=False`, because there is no dict "
-                "of tokenizer outputs to return."
-            )
+        if not tokenize:
+            return_dict = False  # dicts are only returned by the tokenizer anyway
 
-        if return_assistant_tokens_mask and not return_dict:
-            raise ValueError("`return_assistant_tokens_mask=True` is incompatible with `return_dict=False`")
+        if return_assistant_tokens_mask and not (return_dict and tokenize):
+            raise ValueError("`return_assistant_tokens_mask=True` requires `return_dict=True` and `tokenize=True`")
 
         if tokenizer_kwargs is None:
             tokenizer_kwargs = {}
@@ -1783,13 +1780,17 @@ def encode_message_with_chat_template(
             )
 
         if conversation_history is None or len(conversation_history) == 0:
-            return self.apply_chat_template([message], add_generation_prompt=False, tokenize=True, **kwargs)
+            return self.apply_chat_template(
+                [message], add_generation_prompt=False, tokenize=True, return_dict=False, **kwargs
+            )
 
         conversation = conversation_history + [message]
-        tokens = self.apply_chat_template(conversation, add_generation_prompt=False, tokenize=True, **kwargs)
+        tokens = self.apply_chat_template(
+            conversation, add_generation_prompt=False, tokenize=True, return_dict=False, **kwargs
+        )
 
         prefix_tokens = self.apply_chat_template(
-            conversation_history, add_generation_prompt=False, tokenize=True, **kwargs
+            conversation_history, add_generation_prompt=False, tokenize=True, return_dict=False, **kwargs
         )
         # It's possible that the prefix tokens are not a prefix of the full list of tokens.
         # For example, if the prefix is `<s>User: Hi` and the full conversation is `<s>User: Hi</s><s>Assistant: Hello`.
diff --git a/tests/models/blenderbot/test_tokenization_blenderbot.py b/tests/models/blenderbot/test_tokenization_blenderbot.py
index 0997ac4bde41..da6741940c90 100644
--- a/tests/models/blenderbot/test_tokenization_blenderbot.py
+++ b/tests/models/blenderbot/test_tokenization_blenderbot.py
@@ -18,7 +18,6 @@
 from functools import cached_property
 
 from transformers import BlenderbotTokenizer, BlenderbotTokenizerFast
-from transformers.testing_utils import require_jinja
 
 
 class Blenderbot3BTokenizerTests(unittest.TestCase):
@@ -51,24 +50,3 @@ def test_3B_tokenization_same_as_parlai(self):
     def test_3B_tokenization_same_as_parlai_rust_tokenizer(self):
         assert self.rust_tokenizer_3b.add_prefix_space
         assert self.rust_tokenizer_3b([" Sam", "Sam"]).input_ids == [[5502, 2], [5502, 2]]
-
-    @require_jinja
-    def test_tokenization_for_chat(self):
-        tok = self.tokenizer_3b
-        test_chats = [
-            [{"role": "system", "content": "You are a helpful chatbot."}, {"role": "user", "content": "Hello!"}],
-            [
-                {"role": "system", "content": "You are a helpful chatbot."},
-                {"role": "user", "content": "Hello!"},
-                {"role": "assistant", "content": "Nice to meet you."},
-            ],
-            [{"role": "assistant", "content": "Nice to meet you."}, {"role": "user", "content": "Hello!"}],
-        ]
-        tokenized_chats = [tok.apply_chat_template(test_chat) for test_chat in test_chats]
-        expected_tokens = [
-            [553, 366, 265, 4792, 3879, 73, 311, 21, 228, 228, 6950, 8, 2],
-            [553, 366, 265, 4792, 3879, 73, 311, 21, 228, 228, 6950, 8, 228, 3490, 287, 2273, 304, 21, 2],
-            [3490, 287, 2273, 304, 21, 228, 228, 6950, 8, 2],
-        ]
-        for tokenized_chat, expected_tokens in zip(tokenized_chats, expected_tokens):
-            self.assertListEqual(tokenized_chat, expected_tokens)
diff --git a/tests/models/bloom/test_tokenization_bloom.py b/tests/models/bloom/test_tokenization_bloom.py
index 79d330e40277..4a4840dfd9f3 100644
--- a/tests/models/bloom/test_tokenization_bloom.py
+++ b/tests/models/bloom/test_tokenization_bloom.py
@@ -18,7 +18,7 @@
 from datasets import load_dataset
 
 from transformers import BloomTokenizerFast
-from transformers.testing_utils import require_jinja, require_tokenizers
+from transformers.testing_utils import require_tokenizers
 
 from ...test_tokenization_common import TokenizerTesterMixin
 
@@ -137,28 +137,6 @@ def test_encodings_from_xnli_dataset(self):
         predicted_text = [tokenizer.decode(x, clean_up_tokenization_spaces=False) for x in output_tokens]
         self.assertListEqual(predicted_text, input_text)
 
-    @require_jinja
-    def test_tokenization_for_chat(self):
-        tokenizer = self.get_rust_tokenizer()
-        tokenizer.chat_template = "{% for message in messages %}{{ message.content }}{{ eos_token }}{% endfor %}"
-        test_chats = [
-            [{"role": "system", "content": "You are a helpful chatbot."}, {"role": "user", "content": "Hello!"}],
-            [
-                {"role": "system", "content": "You are a helpful chatbot."},
-                {"role": "user", "content": "Hello!"},
-                {"role": "assistant", "content": "Nice to meet you."},
-            ],
-            [{"role": "assistant", "content": "Nice to meet you."}, {"role": "user", "content": "Hello!"}],
-        ]
-        tokenized_chats = [tokenizer.apply_chat_template(test_chat) for test_chat in test_chats]
-        expected_tokens = [
-            [5448, 1306, 267, 66799, 44799, 37143, 17, 2, 59414, 4, 2],
-            [5448, 1306, 267, 66799, 44799, 37143, 17, 2, 59414, 4, 2, 229126, 427, 11890, 1152, 17, 2],
-            [229126, 427, 11890, 1152, 17, 2, 59414, 4, 2],
-        ]
-        for tokenized_chat, expected_tokens in zip(tokenized_chats, expected_tokens):
-            self.assertListEqual(tokenized_chat, expected_tokens)
-
     def test_add_prefix_space_fast(self):
         tokenizer_w_prefix = self.get_rust_tokenizer(add_prefix_space=True)
         tokenizer_wo_prefix = self.get_rust_tokenizer(add_prefix_space=False)
diff --git a/tests/models/cohere/test_tokenization_cohere.py b/tests/models/cohere/test_tokenization_cohere.py
index 73a0942522ac..ce56bbeb6a84 100644
--- a/tests/models/cohere/test_tokenization_cohere.py
+++ b/tests/models/cohere/test_tokenization_cohere.py
@@ -146,32 +146,6 @@ def test_pretrained_model_lists(self):
         self.assertGreaterEqual(len(self.tokenizer_class.pretrained_vocab_files_map), 1)
         self.assertGreaterEqual(len(list(self.tokenizer_class.pretrained_vocab_files_map.values())[0]), 1)
 
-    @require_jinja
-    def test_tokenization_for_chat(self):
-        tokenizer = self.get_rust_tokenizer()
-        test_chats = [
-            [{"role": "system", "content": "You are a helpful chatbot."}, {"role": "user", "content": "Hello!"}],
-            [
-                {"role": "system", "content": "You are a helpful chatbot."},
-                {"role": "user", "content": "Hello!"},
-                {"role": "assistant", "content": "Nice to meet you."},
-            ],
-        ]
-        tokenized_chats = [tokenizer.apply_chat_template(test_chat) for test_chat in test_chats]
-        # fmt: off
-        expected_tokens = [
-            [5, 36, 99, 59, 60, 41, 58, 60, 71, 55, 46, 71, 60, 61, 58, 54, 71, 60, 55, 51, 45, 54, 99, 38, 36, 99, 59, 65, 59, 60, 45, 53, 71, 60, 55, 51, 45, 54, 99, 38, 65, 243, 394, 204, 336, 84, 88, 887, 374, 216, 74, 286, 22, 8, 36, 99, 59, 60, 41, 58, 60, 71, 55, 46, 71, 60, 61, 58, 54, 71, 60, 55, 51, 45, 54, 99, 38, 36, 99, 61, 59, 45, 58, 71, 60, 55, 51, 45, 54, 99, 38, 48, 420, 87, 9, 8],
-            [5, 36, 99, 59, 60, 41, 58, 60, 71, 55, 46, 71, 60, 61, 58, 54, 71, 60, 55, 51, 45, 54, 99, 38, 36, 99, 59, 65,
-            59, 60, 45, 53, 71, 60, 55, 51, 45, 54, 99, 38, 65, 243, 394, 204, 336, 84, 88, 887, 374, 216, 74, 286, 22, 8,
-            36, 99, 59, 60, 41, 58, 60, 71, 55, 46, 71, 60, 61, 58, 54, 71, 60, 55, 51, 45, 54, 99, 38, 36, 99, 61, 59,
-            45, 58, 71, 60, 55, 51, 45, 54, 99, 38, 48, 420, 87, 9, 8, 36, 99, 59, 60, 41, 58, 60, 71, 55, 46, 71, 60, 61,
-            58, 54, 71, 60, 55, 51, 45, 54, 99, 38, 36, 99, 43, 48, 41, 60, 42, 55, 60, 71, 60, 55, 51, 45, 54, 99, 38,
-            54, 567, 235, 693, 276, 411, 243, 22, 8]
-        ]
-        # fmt: on
-        for tokenized_chat, expected_tokens in zip(tokenized_chats, expected_tokens):
-            self.assertListEqual(tokenized_chat, expected_tokens)
-
     @require_jinja
     def test_tokenization_for_tool_use(self):
         tokenizer = self.get_rust_tokenizer()
diff --git a/tests/models/gemma/test_tokenization_gemma.py b/tests/models/gemma/test_tokenization_gemma.py
index 913f7546e84a..0bae68e4b0e3 100644
--- a/tests/models/gemma/test_tokenization_gemma.py
+++ b/tests/models/gemma/test_tokenization_gemma.py
@@ -27,7 +27,6 @@
 from transformers.testing_utils import (
     get_tests_dir,
     nested_simplify,
-    require_jinja,
     require_read_token,
     require_sentencepiece,
     require_tokenizers,
@@ -428,25 +427,6 @@ def test_some_edge_cases(self):
         # a dummy prefix space is not added by the sp_model as it was de-activated
         self.assertEqual(tokens, tokenizer.sp_model.encode("▁▁", out_type=str))
 
-    @require_jinja
-    def test_tokenization_for_chat(self):
-        tokenizer = GemmaTokenizer.from_pretrained("hf-internal-testing/dummy-gemma")
-
-        test_chats = [
-            [{"role": "user", "content": "Hello!"}],
-            [
-                {"role": "user", "content": "Hello!"},
-                {"role": "assistant", "content": "Nice to meet you."},
-            ],
-            [{"role": "user", "content": "Hello!"}],
-        ]
-        # Matt: The third test case tests the default system message, but if this is ever changed in the
-        #       class/repo code then that test will fail, and the case will need to be updated.
-        tokenized_chats = [tokenizer.apply_chat_template(test_chat) for test_chat in test_chats]
-        expected_tokens = [[235322, 235371, 571, 235298, 2997, 73786, 1645, 108, 4521, 149907, 235371, 571, 235298, 615, 73786, 108], [235322, 235371, 571, 235298, 2997, 73786, 1645, 108, 4521, 149907, 235371, 571, 235298, 615, 73786, 108, 235322, 235371, 571, 235298, 2997, 73786, 105776, 108, 7731, 577, 4664, 692, 35606, 235371, 571, 235298, 615, 73786, 108], [235322, 235371, 571, 235298, 2997, 73786, 1645, 108, 4521, 149907, 235371, 571, 235298, 615, 73786, 108]]  # fmt: skip
-        for tokenized_chat, expected_tokens in zip(tokenized_chats, expected_tokens):
-            self.assertListEqual(tokenized_chat, expected_tokens)
-
     def test_save_fast_load_slow(self):
         # Ensure that we can save a fast tokenizer and load it as a slow tokenizer
         slow_tokenizer = self.tokenizer
diff --git a/tests/models/gpt2/test_tokenization_gpt2.py b/tests/models/gpt2/test_tokenization_gpt2.py
index c69e0b521086..be6b90bc4637 100644
--- a/tests/models/gpt2/test_tokenization_gpt2.py
+++ b/tests/models/gpt2/test_tokenization_gpt2.py
@@ -19,7 +19,7 @@
 
 from transformers import AutoTokenizer, GPT2Tokenizer, GPT2TokenizerFast
 from transformers.models.gpt2.tokenization_gpt2 import VOCAB_FILES_NAMES
-from transformers.testing_utils import require_jinja, require_tiktoken, require_tokenizers
+from transformers.testing_utils import require_tiktoken, require_tokenizers
 
 from ...test_tokenization_common import TokenizerTesterMixin
 
@@ -281,28 +281,6 @@ def test_special_tokens_mask_input_pairs_and_bos_token(self):
                 filtered_sequence = [x for x in filtered_sequence if x is not None]
                 self.assertEqual(encoded_sequence, filtered_sequence)
 
-    @require_jinja
-    def test_tokenization_for_chat(self):
-        tokenizer = GPT2Tokenizer.from_pretrained(self.tmpdirname)
-        tokenizer.chat_template = "{% for message in messages %}{{ message.content }}{{ eos_token }}{% endfor %}"
-        test_chats = [
-            [{"role": "system", "content": "You are a helpful chatbot."}, {"role": "user", "content": "Hello!"}],
-            [
-                {"role": "system", "content": "You are a helpful chatbot."},
-                {"role": "user", "content": "Hello!"},
-                {"role": "assistant", "content": "Nice to meet you."},
-            ],
-            [{"role": "assistant", "content": "Nice to meet you."}, {"role": "user", "content": "Hello!"}],
-        ]
-        tokenized_chats = [tokenizer.apply_chat_template(test_chat) for test_chat in test_chats]
-        # fmt: off
-        expected_tokens = [[20, 1, 20, 10, 20, 4, 3, 10, 20, 10, 20, 3, 0, 20, 20, 20, 0, 10, 20, 20, 20, 6, 20, 1, 6, 20, 20, 20, 3, 0, 0, 1, 20, 20],
-                          [20, 1, 20, 10, 20, 4, 3, 10, 20, 10, 20, 3, 0, 20, 20, 20, 0, 10, 20, 20, 20, 6, 20, 1, 6, 20, 20, 20, 3, 0, 0, 1, 20, 20, 20, 7, 20, 3, 10, 6, 1, 10, 20, 3, 3, 6, 10, 20, 1, 20, 20, 20],
-                          [20, 7, 20, 3, 10, 6, 1, 10, 20, 3, 3, 6, 10, 20, 1, 20, 20, 20, 20, 3, 0, 0, 1, 20, 20]]
-        # fmt: on
-        for tokenized_chat, expected_tokens in zip(tokenized_chats, expected_tokens):
-            self.assertListEqual(tokenized_chat, expected_tokens)
-
     @require_tiktoken
     def test_tokenization_tiktoken(self):
         from tiktoken import encoding_name_for_model
diff --git a/tests/models/gpt_sw3/test_tokenization_gpt_sw3.py b/tests/models/gpt_sw3/test_tokenization_gpt_sw3.py
index 4a1a3292c5bf..c77eaecede2a 100644
--- a/tests/models/gpt_sw3/test_tokenization_gpt_sw3.py
+++ b/tests/models/gpt_sw3/test_tokenization_gpt_sw3.py
@@ -15,7 +15,7 @@
 import unittest
 
 from transformers import GPTSw3Tokenizer
-from transformers.testing_utils import get_tests_dir, require_jinja, require_sentencepiece, require_tokenizers, slow
+from transformers.testing_utils import get_tests_dir, require_sentencepiece, require_tokenizers, slow
 
 from ...test_tokenization_common import TokenizerTesterMixin
 
@@ -127,36 +127,3 @@ def test_tokenizer_integration(self):
             model_name="AI-Sweden-Models/gpt-sw3-126m",
             sequences=sequences,
         )
-
-    @require_jinja
-    def test_tokenization_for_chat(self):
-        tokenizer = GPTSw3Tokenizer(SAMPLE_VOCAB)
-        tokenizer.chat_template = (
-            "{{ eos_token }}{{ bos_token }}"
-            "{% for message in messages %}"
-            "{% if message['role'] == 'user' %}{{ 'User: ' + message['content']}}"
-            "{% else %}{{ 'Bot: ' + message['content']}}{% endif %}"
-            "{{ message['text'] }}{{ bos_token }}"
-            "{% endfor %}"
-            "Bot:"
-        )
-        # This is in English, but it's just here to make sure the chat control tokens are being added properly
-        test_chats = [
-            [{"role": "system", "content": "You are a helpful chatbot."}, {"role": "user", "content": "Hello!"}],
-            [
-                {"role": "system", "content": "You are a helpful chatbot."},
-                {"role": "user", "content": "Hello!"},
-                {"role": "assistant", "content": "Nice to meet you."},
-            ],
-            [{"role": "assistant", "content": "Nice to meet you."}, {"role": "user", "content": "Hello!"}],
-        ]
-        tokenized_chats = [tokenizer.apply_chat_template(test_chat) for test_chat in test_chats]
-        # fmt: off
-        expected_tokens = [
-            [2000, 1, 575, 541, 419, 530, 339, 265, 878, 708, 727, 275, 347, 541, 260, 1, 968, 263, 314, 419, 366, 354, 294, 360, 1, 575, 541, 419],
-            [2000, 1, 575, 541, 419, 530, 339, 265, 878, 708, 727, 275, 347, 541, 260, 1, 968, 263, 314, 419, 366, 354, 294, 360, 1, 575, 541, 419, 984, 429, 281, 264, 1261, 291, 260, 1, 575, 541, 419],
-            [2000, 1, 575, 541, 419, 984, 429, 281, 264, 1261, 291, 260, 1, 968, 263, 314, 419, 366, 354, 294, 360, 1, 575, 541, 419]
-            ]
-        # fmt: on
-        for tokenized_chat, expected_tokens in zip(tokenized_chats, expected_tokens):
-            self.assertListEqual(tokenized_chat, expected_tokens)
diff --git a/tests/models/llama/test_tokenization_llama.py b/tests/models/llama/test_tokenization_llama.py
index 58eb1f4e86e8..d69965b1b268 100644
--- a/tests/models/llama/test_tokenization_llama.py
+++ b/tests/models/llama/test_tokenization_llama.py
@@ -32,7 +32,6 @@
 from transformers.testing_utils import (
     get_tests_dir,
     nested_simplify,
-    require_jinja,
     require_read_token,
     require_sentencepiece,
     require_tiktoken,
@@ -702,32 +701,6 @@ def test_fast_post_processor(self):
         with self.assertRaises(ValueError):
             tokenizer = LlamaTokenizerFast(SAMPLE_VOCAB, eos_token=None, add_bos_token=True, add_eos_token=True)
 
-    @require_jinja
-    def test_tokenization_for_chat(self):
-        tokenizer = LlamaTokenizer.from_pretrained("huggyllama/llama-7b", legacy=False)
-
-        test_chats = [
-            [{"role": "system", "content": "You are a helpful chatbot."}, {"role": "user", "content": "Hello!"}],
-            [
-                {"role": "system", "content": "You are a helpful chatbot."},
-                {"role": "user", "content": "Hello!"},
-                {"role": "assistant", "content": "Nice to meet you."},
-            ],
-            [{"role": "user", "content": "Hello!"}],
-        ]
-        # Matt: The third test case tests the default system message, but if this is ever changed in the
-        #       class/repo code then that test will fail, and the case will need to be updated.
-        tokenized_chats = [tokenizer.apply_chat_template(test_chat) for test_chat in test_chats]
-        # fmt: off
-        expected_tokens = [
-            [1, 29961, 25580, 29962, 3532, 14816, 29903, 6778, 13, 3492, 526, 263, 8444, 13563, 7451, 29889, 13, 29966, 829, 14816, 29903, 6778, 13, 13, 10994, 29991, 518, 29914, 25580, 29962],
-            [1, 29961, 25580, 29962, 3532, 14816, 29903, 6778, 13, 3492, 526, 263, 8444, 13563, 7451, 29889, 13, 29966, 829, 14816, 29903, 6778, 13, 13, 10994, 29991, 518, 29914, 25580, 29962, 20103, 304, 5870, 366, 29889, 29871, 2],
-            [1, 29961, 25580, 29962, 15043, 29991, 518, 29914, 25580, 29962]
-        ]
-        # fmt: on
-        for tokenized_chat, expected_tokens in zip(tokenized_chats, expected_tokens):
-            self.assertListEqual(tokenized_chat, expected_tokens)
-
 
 @require_sentencepiece
 @require_tokenizers
diff --git a/tests/test_tokenization_mistral_common.py b/tests/test_tokenization_mistral_common.py
index 82dba87f7d7e..f33501cdc432 100644
--- a/tests/test_tokenization_mistral_common.py
+++ b/tests/test_tokenization_mistral_common.py
@@ -799,7 +799,9 @@ def test_apply_chat_template_basic(self):
 
         # Test 2:
         # without tokenize
-        self.assertEqual(self.tokenizer.apply_chat_template(conversation, tokenize=True), expected_tokenized.tokens)
+        self.assertEqual(
+            self.tokenizer.apply_chat_template(conversation, tokenize=True).input_ids, expected_tokenized.tokens
+        )
 
         with self.assertRaises(
             ValueError, msg="Kwargs [unk_args] are not supported by `MistralCommonTokenizer.apply_chat_template`."
@@ -824,7 +826,7 @@ def test_apply_chat_template_continue_final_message(self):
             expected_tokenized.text,
         )
         self.assertEqual(
-            self.tokenizer.apply_chat_template(conversation, tokenize=True, continue_final_message=True),
+            self.tokenizer.apply_chat_template(conversation, tokenize=True, continue_final_message=True).input_ids,
             expected_tokenized.tokens,
         )
 
@@ -846,7 +848,7 @@ def test_apply_chat_template_with_add_generation_prompt(self):
             token_outputs = self.tokenizer.apply_chat_template(
                 conversation, tokenize=True, add_generation_prompt=add_generation_prompt
             )
-            self.assertEqual(token_outputs, expected_tokenized.tokens)
+            self.assertEqual(token_outputs.input_ids, expected_tokenized.tokens)
 
         # Test 2:
         # with continue_final_message
@@ -958,18 +960,16 @@ def test_apply_chat_template_with_image(self):
                 },
             ]
 
-            output = self.tokenizer.apply_chat_template(conversation, tokenize=True)
+            output = self.tokenizer.apply_chat_template(conversation).input_ids
             self.assertEqual(output, expected_tokenized.tokens)
 
-        output_dict = self.tokenizer.apply_chat_template(conversation, tokenize=True, return_dict=True)
+        output_dict = self.tokenizer.apply_chat_template(conversation, tokenize=True)
         self.assertEqual(output_dict["input_ids"], expected_tokenized.tokens)
         self.assertEqual(len(output_dict["pixel_values"]), len(expected_tokenized.images))
         for o, e in zip(output_dict["pixel_values"], expected_tokenized.images):
             self.assertTrue(np.allclose(o, e))
 
-        output_dict = self.tokenizer.apply_chat_template(
-            conversation, tokenize=True, return_dict=True, return_tensors="pt"
-        )
+        output_dict = self.tokenizer.apply_chat_template(conversation, tokenize=True, return_tensors="pt")
         self.assertEqual(output_dict["input_ids"].tolist()[0], expected_tokenized.tokens)
         expected_images_pt_tensor = torch.from_numpy(np.stack(expected_tokenized.images))
         self.assertTrue(torch.allclose(output_dict["pixel_values"], expected_images_pt_tensor))
@@ -1013,7 +1013,7 @@ def test_apply_chat_template_with_audio(self):
                 },
             ]
 
-            output = self.tokenizer_audio.apply_chat_template(conversation, tokenize=True)
+            output = self.tokenizer_audio.apply_chat_template(conversation, tokenize=True).input_ids
             self.assertEqual(output, expected_tokenized.tokens)
 
         output_dict = self.tokenizer_audio.apply_chat_template(conversation, tokenize=True, return_dict=True)
@@ -1041,14 +1041,14 @@ def test_apply_chat_template_with_truncation(self):
         # Test 1:
         # with truncation
         self.assertEqual(
-            self.tokenizer.apply_chat_template(conversation, tokenize=True, truncation=True, max_length=20),
+            self.tokenizer.apply_chat_template(conversation, tokenize=True, truncation=True, max_length=20).input_ids,
             expected_tokenized.tokens[:20],
         )
 
         # Test 2:
         # without truncation
         self.assertEqual(
-            self.tokenizer.apply_chat_template(conversation, tokenize=True, truncation=False, max_length=20),
+            self.tokenizer.apply_chat_template(conversation, tokenize=True, truncation=False, max_length=20).input_ids,
             expected_tokenized.tokens,
         )
 
@@ -1130,7 +1130,7 @@ def test_batch_apply_chat_template(self):
         ]
 
         text_outputs = self.tokenizer.apply_chat_template(conversations, tools=tools, tokenize=False)
-        token_outputs = self.tokenizer.apply_chat_template(conversations, tools=tools, tokenize=True)
+        token_outputs = self.tokenizer.apply_chat_template(conversations, tools=tools, tokenize=True).input_ids
 
         self.assertEqual(len(text_outputs), len(token_outputs))
         self.assertEqual(len(text_outputs), len(expected_tokenized))
@@ -1202,7 +1202,7 @@ def test_batch_apply_chat_template_images(self):
             ChatCompletionRequest.from_openai(ref_conversation)
         )
 
-        output = self.tokenizer.apply_chat_template(conversations, tokenize=True)
+        output = self.tokenizer.apply_chat_template(conversations, tokenize=True).input_ids
         self.assertEqual(output, [expected_tokenized.tokens] * 3)
 
         output = self.tokenizer.apply_chat_template(conversations, tokenize=True, return_dict=True)
@@ -1248,7 +1248,9 @@ def test_batch_apply_chat_template_with_continue_final_message(self):
             for conversation in conversations
         ]
 
-        token_outputs = self.tokenizer.apply_chat_template(conversations, tokenize=True, continue_final_message=True)
+        token_outputs = self.tokenizer.apply_chat_template(
+            conversations, tokenize=True, continue_final_message=True
+        ).input_ids
 
         for output, expected in zip(token_outputs, expected_tokenized):
             self.assertEqual(output, expected.tokens)
@@ -1297,7 +1299,7 @@ def test_batch_apply_chat_template_with_add_generation_prompt(self):
             ]
             token_outputs = self.tokenizer.apply_chat_template(
                 conversations, tokenize=True, add_generation_prompt=add_generation_prompt
-            )
+            ).input_ids
             for output, expected in zip(token_outputs, expected_tokenized):
                 self.assertEqual(output, expected.tokens)
 
@@ -1331,7 +1333,7 @@ def test_batch_apply_chat_template_with_truncation(
         # with truncation
         token_outputs = self.tokenizer.apply_chat_template(
             self.fixture_conversations, tokenize=True, truncation=True, max_length=20
-        )
+        ).input_ids
 
         for output, expected in zip(token_outputs, self.tokenized_fixture_conversations):
             self.assertEqual(output, expected.tokens[:20])
@@ -1340,7 +1342,7 @@ def test_batch_apply_chat_template_with_truncation(
         # without truncation
         token_outputs = self.tokenizer.apply_chat_template(
             self.fixture_conversations, tokenize=True, truncation=False, max_length=20
-        )
+        ).input_ids
         self.assertEqual(len(token_outputs), len(self.tokenized_fixture_conversations))
         for output, expected in zip(token_outputs, self.tokenized_fixture_conversations):
             self.assertEqual(output, expected.tokens)
@@ -1358,7 +1360,9 @@ def test_batch_apply_chat_template_with_padding(
         for padding in [True, "max_length", PaddingStrategy.LONGEST, PaddingStrategy.MAX_LENGTH]:
             if padding == PaddingStrategy.MAX_LENGTH:
                 # No padding if no max length is provided
-                token_outputs = self.tokenizer.apply_chat_template(self.fixture_conversations, padding=padding)
+                token_outputs = self.tokenizer.apply_chat_template(
+                    self.fixture_conversations, padding=padding, return_dict=False
+                )
                 self.assertEqual(len(token_outputs), len(self.tokenized_fixture_conversations))
                 for output, expected in zip(token_outputs, self.tokenized_fixture_conversations):
                     self.assertEqual(output, expected.tokens)
@@ -1366,7 +1370,7 @@ def test_batch_apply_chat_template_with_padding(
             max_length = 20 if padding == PaddingStrategy.MAX_LENGTH else None
 
             token_outputs = self.tokenizer.apply_chat_template(
-                self.fixture_conversations, tokenize=True, padding=padding, max_length=max_length
+                self.fixture_conversations, tokenize=True, padding=padding, max_length=max_length, return_dict=False
             )
 
             if padding != PaddingStrategy.MAX_LENGTH:
@@ -1390,7 +1394,7 @@ def test_batch_apply_chat_template_with_padding(
 
         for padding in [False, "do_not_pad", PaddingStrategy.DO_NOT_PAD]:
             token_outputs = self.tokenizer.apply_chat_template(
-                self.fixture_conversations, tokenize=True, padding=padding
+                self.fixture_conversations, tokenize=True, padding=padding, return_dict=False
             )
             self.assertEqual(len(token_outputs), len(self.tokenized_fixture_conversations))
             for output, expected in zip(token_outputs, self.tokenized_fixture_conversations):
@@ -1402,7 +1406,12 @@ def test_batch_apply_chat_template_with_padding_and_truncation(
         max_length = 20
         for padding in [True, "max_length", PaddingStrategy.LONGEST, PaddingStrategy.MAX_LENGTH]:
             token_outputs = self.tokenizer.apply_chat_template(
-                self.fixture_conversations, tokenize=True, truncation=True, padding=padding, max_length=max_length
+                self.fixture_conversations,
+                tokenize=True,
+                truncation=True,
+                padding=padding,
+                max_length=max_length,
+                return_dict=False,
             )
             self.assertEqual(len(token_outputs), len(self.tokenized_fixture_conversations))
             for output, expected in zip(token_outputs, self.tokenized_fixture_conversations):
@@ -1411,7 +1420,12 @@ def test_batch_apply_chat_template_with_padding_and_truncation(
                 )
         for padding in [False, "do_not_pad", PaddingStrategy.DO_NOT_PAD]:
             token_outputs = self.tokenizer.apply_chat_template(
-                self.fixture_conversations, tokenize=True, truncation=True, padding=padding, max_length=max_length
+                self.fixture_conversations,
+                tokenize=True,
+                truncation=True,
+                padding=padding,
+                max_length=max_length,
+                return_dict=False,
             )
             self.assertEqual(len(token_outputs), len(self.tokenized_fixture_conversations))
             for output, expected in zip(token_outputs, self.tokenized_fixture_conversations):
@@ -1421,7 +1435,7 @@ def test_batch_apply_chat_template_return_tensors(self):
         # Test 1:
         # with tokenize
         token_outputs = self.tokenizer.apply_chat_template(
-            self.fixture_conversations, tokenize=True, return_tensors="pt", padding=True
+            self.fixture_conversations, tokenize=True, return_tensors="pt", padding=True, return_dict=False
         )
         self.assertIsInstance(token_outputs, torch.Tensor)
         self.assertEqual(
@@ -1432,7 +1446,7 @@ def test_batch_apply_chat_template_return_tensors(self):
         # Test 2:
         # without tokenize, should ignore return_tensors
         token_outputs = self.tokenizer.apply_chat_template(
-            self.fixture_conversations, tokenize=False, return_tensors="pt", padding=True
+            self.fixture_conversations, tokenize=False, return_tensors="pt", padding=True, return_dict=False
         )
         self.assertEqual(token_outputs, [t.text for t in self.tokenized_fixture_conversations])
 
diff --git a/tests/tokenization/test_tokenization_utils.py b/tests/tokenization/test_tokenization_utils.py
index 24aac3719812..6fd20a2cf473 100644
--- a/tests/tokenization/test_tokenization_utils.py
+++ b/tests/tokenization/test_tokenization_utils.py
@@ -323,7 +323,7 @@ def test_encode_message(self):
         ]
 
         # First, test the default case, where we encode the whole conversation at once
-        whole_conversation_tokens = tokenizer.apply_chat_template(conversation, tokenize=True)
+        whole_conversation_tokens = tokenizer.apply_chat_template(conversation, tokenize=True, return_dict=False)
 
         # Now, test the message-by-message encoding
         tokens = []