diff --git a/src/transformers/modeling_utils.py b/src/transformers/modeling_utils.py index c1e045bd8451..7dfc2a68e381 100644 --- a/src/transformers/modeling_utils.py +++ b/src/transformers/modeling_utils.py @@ -4757,3 +4757,8 @@ def encode(self, input_values: torch.Tensor, *args, **kwargs): @abstractmethod def decode(self, audio_codes: torch.Tensor, *args, **kwargs): """Decode from discrete audio codebooks back to raw audio""" + + +def empty_function(x: int): + # This is just here to trigger the full CI by modifying a core file, and should definitely be removed before merging + return x + 1 diff --git a/src/transformers/models/auto/tokenization_auto.py b/src/transformers/models/auto/tokenization_auto.py index 259e04a56b1e..fcfcfdf8626e 100644 --- a/src/transformers/models/auto/tokenization_auto.py +++ b/src/transformers/models/auto/tokenization_auto.py @@ -693,9 +693,11 @@ def from_pretrained( if tokenizer_class is None and not tokenizer_class_candidate.endswith("Fast"): tokenizer_class = tokenizer_class_from_name(tokenizer_class_candidate + "Fast") if tokenizer_class is not None and tokenizer_class.__name__ == "PythonBackend": + raise ValueError("Fallback happened here!!") tokenizer_class = TokenizersBackend # Fallback to TokenizersBackend if the class wasn't found if tokenizer_class is None: + raise ValueError("Fallback happened here!!") tokenizer_class = TokenizersBackend return tokenizer_class.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs) diff --git a/src/transformers/tokenization_utils_base.py b/src/transformers/tokenization_utils_base.py index 89433159b183..8a0c137c648e 100644 --- a/src/transformers/tokenization_utils_base.py +++ b/src/transformers/tokenization_utils_base.py @@ -1432,7 +1432,7 @@ def get_vocab(self) -> dict[str, int]: Returns: `dict[str, int]`: The vocabulary. """ - raise NotImplementedError() + raise NotImplementedError("We got here!") def convert_tokens_to_ids(self, tokens: str | list[str]) -> int | list[int]: """ diff --git a/tests/models/idefics/test_processing_idefics.py b/tests/models/idefics/test_processing_idefics.py index f49942a4a4d8..2288b45001fe 100644 --- a/tests/models/idefics/test_processing_idefics.py +++ b/tests/models/idefics/test_processing_idefics.py @@ -98,6 +98,8 @@ def test_save_load_pretrained_additional_features(self): ) self.assertEqual(processor.tokenizer.get_vocab(), tokenizer_add_kwargs.get_vocab()) + if not isinstance(processor.tokenizer, self._get_component_class_from_processor("tokenizer")): + raise ValueError("We got here!") self.assertIsInstance(processor.tokenizer, self._get_component_class_from_processor("tokenizer")) self.assertEqual(processor.image_processor.to_json_string(), image_processor_add_kwargs.to_json_string())