diff --git a/docs/source/en/internal/generation_utils.md b/docs/source/en/internal/generation_utils.md index 0221622c4080..936e4bfb95da 100644 --- a/docs/source/en/internal/generation_utils.md +++ b/docs/source/en/internal/generation_utils.md @@ -140,9 +140,6 @@ generation. [[autodoc]] ForcedEOSTokenLogitsProcessor - __call__ -[[autodoc]] ForceTokensLogitsProcessor - - __call__ - [[autodoc]] HammingDiversityLogitsProcessor - __call__ diff --git a/docs/source/ja/internal/generation_utils.md b/docs/source/ja/internal/generation_utils.md index 9e3ce7799543..1a5cc1dec079 100644 --- a/docs/source/ja/internal/generation_utils.md +++ b/docs/source/ja/internal/generation_utils.md @@ -139,9 +139,6 @@ generation_output[:2] [[autodoc]] ForcedEOSTokenLogitsProcessor - __call__ -[[autodoc]] ForceTokensLogitsProcessor - - __call__ - [[autodoc]] HammingDiversityLogitsProcessor - __call__ diff --git a/docs/source/zh/internal/generation_utils.md b/docs/source/zh/internal/generation_utils.md index 75f28c233ee0..084e2a29dc8c 100644 --- a/docs/source/zh/internal/generation_utils.md +++ b/docs/source/zh/internal/generation_utils.md @@ -133,9 +133,6 @@ generation_output[:2] [[autodoc]] ForcedEOSTokenLogitsProcessor - __call__ -[[autodoc]] ForceTokensLogitsProcessor - - __call__ - [[autodoc]] HammingDiversityLogitsProcessor - __call__ diff --git a/src/transformers/__init__.py b/src/transformers/__init__.py index 1d36e7f8c746..ced2b9997366 100755 --- a/src/transformers/__init__.py +++ b/src/transformers/__init__.py @@ -1276,7 +1276,6 @@ "ExponentialDecayLengthPenalty", "ForcedBOSTokenLogitsProcessor", "ForcedEOSTokenLogitsProcessor", - "ForceTokensLogitsProcessor", "GenerationMixin", "HammingDiversityLogitsProcessor", "InfNanRemoveLogitsProcessor", @@ -6059,7 +6058,6 @@ ExponentialDecayLengthPenalty, ForcedBOSTokenLogitsProcessor, ForcedEOSTokenLogitsProcessor, - ForceTokensLogitsProcessor, GenerationMixin, HammingDiversityLogitsProcessor, InfNanRemoveLogitsProcessor, diff --git a/src/transformers/commands/pt_to_tf.py b/src/transformers/commands/pt_to_tf.py index 4002b5e0eb85..ad0dbd14e15b 100644 --- a/src/transformers/commands/pt_to_tf.py +++ b/src/transformers/commands/pt_to_tf.py @@ -12,45 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -import inspect import os from argparse import ArgumentParser, Namespace -from importlib import import_module -import huggingface_hub -import numpy as np -from packaging import version - -from .. import ( - FEATURE_EXTRACTOR_MAPPING, - IMAGE_PROCESSOR_MAPPING, - PROCESSOR_MAPPING, - TOKENIZER_MAPPING, - AutoConfig, - AutoFeatureExtractor, - AutoImageProcessor, - AutoProcessor, - AutoTokenizer, - is_datasets_available, - is_tf_available, - is_torch_available, -) -from ..utils import TF2_WEIGHTS_INDEX_NAME, TF2_WEIGHTS_NAME, logging +from ..utils import logging from . import BaseTransformersCLICommand -if is_tf_available(): - import tensorflow as tf - - tf.config.experimental.enable_tensor_float_32_execution(False) - -if is_torch_available(): - import torch - -if is_datasets_available(): - from datasets import load_dataset - - MAX_ERROR = 5e-5 # larger error tolerance than in our internal tests, to avoid flaky user-facing errors @@ -136,44 +104,6 @@ def register_subcommand(parser: ArgumentParser): ) train_parser.set_defaults(func=convert_command_factory) - @staticmethod - def find_pt_tf_differences(pt_outputs, tf_outputs): - """ - Compares the TensorFlow and PyTorch outputs, returning a dictionary with all tensor differences. - """ - # 1. All output attributes must be the same - pt_out_attrs = set(pt_outputs.keys()) - tf_out_attrs = set(tf_outputs.keys()) - if pt_out_attrs != tf_out_attrs: - raise ValueError( - f"The model outputs have different attributes, aborting. (Pytorch: {pt_out_attrs}, TensorFlow:" - f" {tf_out_attrs})" - ) - - # 2. For each output attribute, computes the difference - def _find_pt_tf_differences(pt_out, tf_out, differences, attr_name=""): - # If the current attribute is a tensor, it is a leaf and we make the comparison. Otherwise, we will dig in - # recursivelly, keeping the name of the attribute. - if isinstance(pt_out, torch.Tensor): - tensor_difference = np.max(np.abs(pt_out.numpy() - tf_out.numpy())) - differences[attr_name] = tensor_difference - else: - root_name = attr_name - for i, pt_item in enumerate(pt_out): - # If it is a named attribute, we keep the name. Otherwise, just its index. - if isinstance(pt_item, str): - branch_name = root_name + pt_item - tf_item = tf_out[pt_item] - pt_item = pt_out[pt_item] - else: - branch_name = root_name + f"[{i}]" - tf_item = tf_out[i] - differences = _find_pt_tf_differences(pt_item, tf_item, differences, branch_name) - - return differences - - return _find_pt_tf_differences(pt_outputs, tf_outputs, {}) - def __init__( self, model_name: str, @@ -196,237 +126,12 @@ def __init__( self._extra_commit_description = extra_commit_description self._override_model_class = override_model_class - def get_inputs(self, pt_model, tf_dummy_inputs, config): - """ - Returns the right inputs for the model, based on its signature. - """ - - def _get_audio_input(): - ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") - speech_samples = ds.sort("id").select(range(2))[:2]["audio"] - raw_samples = [x["array"] for x in speech_samples] - return raw_samples - - model_config_class = type(pt_model.config) - if model_config_class in PROCESSOR_MAPPING: - processor = AutoProcessor.from_pretrained(self._local_dir) - if model_config_class in TOKENIZER_MAPPING and processor.tokenizer.pad_token is None: - processor.tokenizer.pad_token = processor.tokenizer.eos_token - elif model_config_class in IMAGE_PROCESSOR_MAPPING: - processor = AutoImageProcessor.from_pretrained(self._local_dir) - elif model_config_class in FEATURE_EXTRACTOR_MAPPING: - processor = AutoFeatureExtractor.from_pretrained(self._local_dir) - elif model_config_class in TOKENIZER_MAPPING: - processor = AutoTokenizer.from_pretrained(self._local_dir) - if processor.pad_token is None: - processor.pad_token = processor.eos_token - else: - raise ValueError(f"Unknown data processing type (model config type: {model_config_class})") - - model_forward_signature = set(inspect.signature(pt_model.forward).parameters.keys()) - processor_inputs = {} - if "input_ids" in model_forward_signature: - processor_inputs.update( - { - "text": ["Hi there!", "I am a batch with more than one row and different input lengths."], - "padding": True, - "truncation": True, - } - ) - if "pixel_values" in model_forward_signature: - sample_images = load_dataset("uoft-cs/cifar10", "plain_text", split="test")[:2]["img"] # no-script - processor_inputs.update({"images": sample_images}) - if "input_features" in model_forward_signature: - feature_extractor_signature = inspect.signature(processor.feature_extractor).parameters - # Pad to the largest input length by default but take feature extractor default - # padding value if it exists e.g. "max_length" and is not False or None - if "padding" in feature_extractor_signature: - default_strategy = feature_extractor_signature["padding"].default - if default_strategy is not False and default_strategy is not None: - padding_strategy = default_strategy - else: - padding_strategy = True - else: - padding_strategy = True - processor_inputs.update({"audio": _get_audio_input(), "padding": padding_strategy}) - if "input_values" in model_forward_signature: # Wav2Vec2 audio input - processor_inputs.update({"audio": _get_audio_input(), "padding": True}) - pt_input = processor(**processor_inputs, return_tensors="pt") - tf_input = processor(**processor_inputs, return_tensors="tf") - - # Extra input requirements, in addition to the input modality - if ( - config.is_encoder_decoder - or (hasattr(pt_model, "encoder") and hasattr(pt_model, "decoder")) - or "decoder_input_ids" in tf_dummy_inputs - ): - decoder_input_ids = np.asarray([[1], [1]], dtype=int) * (pt_model.config.decoder_start_token_id or 0) - pt_input.update({"decoder_input_ids": torch.tensor(decoder_input_ids)}) - tf_input.update({"decoder_input_ids": tf.convert_to_tensor(decoder_input_ids)}) - - return pt_input, tf_input - def run(self): - self._logger.warning( - "\n\nConverting PyTorch weights to TensorFlow is deprecated and will be removed in v4.43. " + # TODO (joao): delete file in v4.47 + raise NotImplementedError( + "\n\nConverting PyTorch weights to TensorFlow weights was removed in v4.43. " "Instead, we recommend that you convert PyTorch weights to Safetensors, an improved " "format that can be loaded by any framework, including TensorFlow. For more information, " "please see the Safetensors conversion guide: " "https://huggingface.co/docs/safetensors/en/convert-weights\n\n" ) - # hub version 0.9.0 introduced the possibility of programmatically opening PRs with normal write tokens. - if version.parse(huggingface_hub.__version__) < version.parse("0.9.0"): - raise ImportError( - "The huggingface_hub version must be >= 0.9.0 to use this command. Please update your huggingface_hub" - " installation." - ) - else: - from huggingface_hub import Repository, create_commit - from huggingface_hub._commit_api import CommitOperationAdd - - # Fetch remote data - repo = Repository(local_dir=self._local_dir, clone_from=self._model_name) - - # Load config and get the appropriate architecture -- the latter is needed to convert the head's weights - config = AutoConfig.from_pretrained(self._local_dir) - architectures = config.architectures - if self._override_model_class is not None: - if self._override_model_class.startswith("TF"): - architectures = [self._override_model_class[2:]] - else: - architectures = [self._override_model_class] - try: - pt_class = getattr(import_module("transformers"), architectures[0]) - except AttributeError: - raise ValueError(f"Model class {self._override_model_class} not found in transformers.") - try: - tf_class = getattr(import_module("transformers"), "TF" + architectures[0]) - except AttributeError: - raise ValueError(f"TF model class TF{self._override_model_class} not found in transformers.") - elif architectures is None: # No architecture defined -- use auto classes - pt_class = getattr(import_module("transformers"), "AutoModel") - tf_class = getattr(import_module("transformers"), "TFAutoModel") - self._logger.warning("No detected architecture, using AutoModel/TFAutoModel") - else: # Architecture defined -- use it - if len(architectures) > 1: - raise ValueError(f"More than one architecture was found, aborting. (architectures = {architectures})") - self._logger.warning(f"Detected architecture: {architectures[0]}") - pt_class = getattr(import_module("transformers"), architectures[0]) - try: - tf_class = getattr(import_module("transformers"), "TF" + architectures[0]) - except AttributeError: - raise AttributeError(f"The TensorFlow equivalent of {architectures[0]} doesn't exist in transformers.") - - # Check the TF dummy inputs to see what keys we need in the forward pass - tf_from_pt_model = tf_class.from_config(config) - tf_dummy_inputs = tf_from_pt_model.dummy_inputs - - del tf_from_pt_model # Try to keep only one model in memory at a time - - # Load the model and get some basic inputs - pt_model = pt_class.from_pretrained(self._local_dir) - pt_model.eval() - - pt_input, tf_input = self.get_inputs(pt_model, tf_dummy_inputs, config) - - with torch.no_grad(): - pt_outputs = pt_model(**pt_input, output_hidden_states=True) - del pt_model # will no longer be used, and may have a large memory footprint - - tf_from_pt_model = tf_class.from_pretrained(self._local_dir, from_pt=True) - tf_from_pt_outputs = tf_from_pt_model(**tf_input, output_hidden_states=True, training=False) - - # Confirms that cross loading PT weights into TF worked. - crossload_differences = self.find_pt_tf_differences(pt_outputs, tf_from_pt_outputs) - output_differences = {k: v for k, v in crossload_differences.items() if "hidden" not in k} - hidden_differences = {k: v for k, v in crossload_differences.items() if "hidden" in k} - if len(output_differences) == 0 and architectures is not None: - raise ValueError( - f"Something went wrong -- the config file has architectures ({architectures}), but no model head" - " output was found. All outputs start with 'hidden'" - ) - max_crossload_output_diff = max(output_differences.values()) if output_differences else 0.0 - max_crossload_hidden_diff = max(hidden_differences.values()) - if max_crossload_output_diff > self._max_error or max_crossload_hidden_diff > self._max_error: - raise ValueError( - "The cross-loaded TensorFlow model has different outputs, something went wrong!\n" - + f"\nList of maximum output differences above the threshold ({self._max_error}):\n" - + "\n".join([f"{k}: {v:.3e}" for k, v in output_differences.items() if v > self._max_error]) - + f"\n\nList of maximum hidden layer differences above the threshold ({self._max_error}):\n" - + "\n".join([f"{k}: {v:.3e}" for k, v in hidden_differences.items() if v > self._max_error]) - ) - - # Save the weights in a TF format (if needed) and confirms that the results are still good - tf_weights_path = os.path.join(self._local_dir, TF2_WEIGHTS_NAME) - tf_weights_index_path = os.path.join(self._local_dir, TF2_WEIGHTS_INDEX_NAME) - if (not os.path.exists(tf_weights_path) and not os.path.exists(tf_weights_index_path)) or self._new_weights: - tf_from_pt_model.save_pretrained(self._local_dir) - del tf_from_pt_model # will no longer be used, and may have a large memory footprint - - tf_model = tf_class.from_pretrained(self._local_dir) - tf_outputs = tf_model(**tf_input, output_hidden_states=True) - - conversion_differences = self.find_pt_tf_differences(pt_outputs, tf_outputs) - output_differences = {k: v for k, v in conversion_differences.items() if "hidden" not in k} - hidden_differences = {k: v for k, v in conversion_differences.items() if "hidden" in k} - if len(output_differences) == 0 and architectures is not None: - raise ValueError( - f"Something went wrong -- the config file has architectures ({architectures}), but no model head" - " output was found. All outputs start with 'hidden'" - ) - max_conversion_output_diff = max(output_differences.values()) if output_differences else 0.0 - max_conversion_hidden_diff = max(hidden_differences.values()) - if max_conversion_output_diff > self._max_error or max_conversion_hidden_diff > self._max_error: - raise ValueError( - "The converted TensorFlow model has different outputs, something went wrong!\n" - + f"\nList of maximum output differences above the threshold ({self._max_error}):\n" - + "\n".join([f"{k}: {v:.3e}" for k, v in output_differences.items() if v > self._max_error]) - + f"\n\nList of maximum hidden layer differences above the threshold ({self._max_error}):\n" - + "\n".join([f"{k}: {v:.3e}" for k, v in hidden_differences.items() if v > self._max_error]) - ) - - commit_message = "Update TF weights" if self._new_weights else "Add TF weights" - if self._push: - repo.git_add(auto_lfs_track=True) - repo.git_commit(commit_message) - repo.git_push(blocking=True) # this prints a progress bar with the upload - self._logger.warning(f"TF weights pushed into {self._model_name}") - elif not self._no_pr: - self._logger.warning("Uploading the weights into a new PR...") - commit_descrition = ( - "Model converted by the [`transformers`' `pt_to_tf`" - " CLI](https://github.com/huggingface/transformers/blob/main/src/transformers/commands/pt_to_tf.py). " - "All converted model outputs and hidden layers were validated against its PyTorch counterpart.\n\n" - f"Maximum crossload output difference={max_crossload_output_diff:.3e}; " - f"Maximum crossload hidden layer difference={max_crossload_hidden_diff:.3e};\n" - f"Maximum conversion output difference={max_conversion_output_diff:.3e}; " - f"Maximum conversion hidden layer difference={max_conversion_hidden_diff:.3e};\n" - ) - if self._max_error > MAX_ERROR: - commit_descrition += ( - f"\n\nCAUTION: The maximum admissible error was manually increased to {self._max_error}!" - ) - if self._extra_commit_description: - commit_descrition += "\n\n" + self._extra_commit_description - - # sharded model -> adds all related files (index and .h5 shards) - if os.path.exists(tf_weights_index_path): - operations = [ - CommitOperationAdd(path_in_repo=TF2_WEIGHTS_INDEX_NAME, path_or_fileobj=tf_weights_index_path) - ] - for shard_path in tf.io.gfile.glob(self._local_dir + "/tf_model-*.h5"): - operations += [ - CommitOperationAdd(path_in_repo=os.path.basename(shard_path), path_or_fileobj=shard_path) - ] - else: - operations = [CommitOperationAdd(path_in_repo=TF2_WEIGHTS_NAME, path_or_fileobj=tf_weights_path)] - - hub_pr_url = create_commit( - repo_id=self._model_name, - operations=operations, - commit_message=commit_message, - commit_description=commit_descrition, - repo_type="model", - create_pr=True, - ).pr_url - self._logger.warning(f"PR open in {hub_pr_url}") diff --git a/src/transformers/configuration_utils.py b/src/transformers/configuration_utils.py index 2f84bc29aee2..c6e3d90b9f0c 100755 --- a/src/transformers/configuration_utils.py +++ b/src/transformers/configuration_utils.py @@ -81,6 +81,15 @@ class PretrainedConfig(PushToHubMixin): model. - **num_hidden_layers** (`int`) -- The number of blocks in the model. + + + Setting parameters for sequence generation in the model config is deprecated. For backward compatibility, loading + some of them will still be possible, but attempting to overwrite them will throw an exception -- you should set + them in a [~transformers.GenerationConfig]. Check the documentation of [~transformers.GenerationConfig] for more + information about the individual parameters. + + + Arg: name_or_path (`str`, *optional*, defaults to `""`): Store the string that was passed to [`PreTrainedModel.from_pretrained`] or @@ -117,77 +126,6 @@ class PretrainedConfig(PushToHubMixin): sequence_length embeddings at a time. For more information on feed forward chunking, see [How does Feed Forward Chunking work?](../glossary.html#feed-forward-chunking). - > Parameters for sequence generation - - max_length (`int`, *optional*, defaults to 20): - Maximum length that will be used by default in the `generate` method of the model. - min_length (`int`, *optional*, defaults to 0): - Minimum length that will be used by default in the `generate` method of the model. - do_sample (`bool`, *optional*, defaults to `False`): - Flag that will be used by default in the `generate` method of the model. Whether or not to use sampling ; - use greedy decoding otherwise. - early_stopping (`bool`, *optional*, defaults to `False`): - Flag that will be used by default in the `generate` method of the model. Whether to stop the beam search - when at least `num_beams` sentences are finished per batch or not. - num_beams (`int`, *optional*, defaults to 1): - Number of beams for beam search that will be used by default in the `generate` method of the model. 1 means - no beam search. - num_beam_groups (`int`, *optional*, defaults to 1): - Number of groups to divide `num_beams` into in order to ensure diversity among different groups of beams - that will be used by default in the `generate` method of the model. 1 means no group beam search. - diversity_penalty (`float`, *optional*, defaults to 0.0): - Value to control diversity for group beam search. that will be used by default in the `generate` method of - the model. 0 means no diversity penalty. The higher the penalty, the more diverse are the outputs. - temperature (`float`, *optional*, defaults to 1.0): - The value used to module the next token probabilities that will be used by default in the `generate` method - of the model. Must be strictly positive. - top_k (`int`, *optional*, defaults to 50): - Number of highest probability vocabulary tokens to keep for top-k-filtering that will be used by default in - the `generate` method of the model. - top_p (`float`, *optional*, defaults to 1): - Value that will be used by default in the `generate` method of the model for `top_p`. If set to float < 1, - only the most probable tokens with probabilities that add up to `top_p` or higher are kept for generation. - typical_p (`float`, *optional*, defaults to 1): - Local typicality measures how similar the conditional probability of predicting a target token next is to - the expected conditional probability of predicting a random token next, given the partial text already - generated. If set to float < 1, the smallest set of the most locally typical tokens with probabilities that - add up to `typical_p` or higher are kept for generation. See [this - paper](https://arxiv.org/pdf/2202.00666.pdf) for more details. - repetition_penalty (`float`, *optional*, defaults to 1): - Parameter for repetition penalty that will be used by default in the `generate` method of the model. 1.0 - means no penalty. - length_penalty (`float`, *optional*, defaults to 1): - Exponential penalty to the length that is used with beam-based generation. It is applied as an exponent to - the sequence length, which in turn is used to divide the score of the sequence. Since the score is the log - likelihood of the sequence (i.e. negative), `length_penalty` > 0.0 promotes longer sequences, while - `length_penalty` < 0.0 encourages shorter sequences. - no_repeat_ngram_size (`int`, *optional*, defaults to 0) -- Value that will be used by default in the - `generate` method of the model for `no_repeat_ngram_size`. If set to int > 0, all ngrams of that size can - only occur once. - encoder_no_repeat_ngram_size (`int`, *optional*, defaults to 0) -- Value that will be used by - default in the `generate` method of the model for `encoder_no_repeat_ngram_size`. If set to int > 0, all - ngrams of that size that occur in the `encoder_input_ids` cannot occur in the `decoder_input_ids`. - bad_words_ids (`List[int]`, *optional*): - List of token ids that are not allowed to be generated that will be used by default in the `generate` - method of the model. In order to get the tokens of the words that should not appear in the generated text, - use `tokenizer.encode(bad_word, add_prefix_space=True)`. - num_return_sequences (`int`, *optional*, defaults to 1): - Number of independently computed returned sequences for each element in the batch that will be used by - default in the `generate` method of the model. - output_scores (`bool`, *optional*, defaults to `False`): - Whether the model should return the logits when used for generation. - return_dict_in_generate (`bool`, *optional*, defaults to `False`): - Whether the model should return a [`~transformers.utils.ModelOutput`] instead of a `torch.LongTensor`. - forced_bos_token_id (`int`, *optional*): - The id of the token to force as the first generated token after the `decoder_start_token_id`. Useful for - multilingual models like [mBART](../model_doc/mbart) where the first generated token needs to be the target - language token. - forced_eos_token_id (`int`, *optional*): - The id of the token to force as the last generated token when `max_length` is reached. - remove_invalid_values (`bool`, *optional*): - Whether to remove possible _nan_ and _inf_ outputs of the model to prevent the generation method to crash. - Note that using `remove_invalid_values` can slow down generation. - > Parameters for fine-tuning tasks architectures (`List[str]`, *optional*): @@ -287,7 +225,7 @@ def __init__(self, **kwargs): # Retrocompatibility: Parameters for sequence generation. While we will keep the ability to load these # parameters, saving them will be deprecated. In a distant future, we won't need to load them. - for parameter_name, default_value in self._get_generation_defaults().items(): + for parameter_name, default_value in self._get_global_generation_defaults().items(): setattr(self, parameter_name, kwargs.pop(parameter_name, default_value)) # Fine-tuning task arguments @@ -440,16 +378,13 @@ def save_pretrained(self, save_directory: Union[str, os.PathLike], push_to_hub: if os.path.isfile(save_directory): raise AssertionError(f"Provided path ({save_directory}) should be a directory, not a file") - non_default_generation_parameters = {} - for parameter_name, default_value in self._get_generation_defaults().items(): - if hasattr(self, parameter_name) and getattr(self, parameter_name) != default_value: - non_default_generation_parameters[parameter_name] = getattr(self, parameter_name) + non_default_generation_parameters = self._get_non_default_generation_parameters() if len(non_default_generation_parameters) > 0: - logger.warning( - "Some non-default generation parameters are set in the model config. These should go into a " - "GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) " - "instead. This warning will be raised to an exception in v4.41.\n" - f"Non-default generation parameters: {str(non_default_generation_parameters)}" + raise ValueError( + "Some non-default generation parameters are set in the model config. These should go into either a) " + "`model.generation_config` (as opposed to `model.config`); OR b) a GenerationConfig file " + "(https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) " + f"\nNon-default generation parameters: {str(non_default_generation_parameters)}" ) os.makedirs(save_directory, exist_ok=True) @@ -1049,7 +984,7 @@ def register_for_auto_class(cls, auto_class="AutoConfig"): cls._auto_class = auto_class @staticmethod - def _get_generation_defaults() -> Dict[str, Any]: + def _get_global_generation_defaults() -> Dict[str, Any]: return { "max_length": 20, "min_length": 0, @@ -1078,14 +1013,49 @@ def _get_generation_defaults() -> Dict[str, Any]: "begin_suppress_tokens": None, } - def _has_non_default_generation_parameters(self) -> bool: + def _get_non_default_generation_parameters(self) -> Dict[str, Any]: """ - Whether or not this instance holds non-default generation parameters. + Gets the non-default generation parameters on the PretrainedConfig instance """ - for parameter_name, default_value in self._get_generation_defaults().items(): - if hasattr(self, parameter_name) and getattr(self, parameter_name) != default_value: - return True - return False + non_default_generation_parameters = {} + decoder_attribute_name = None + default_config = None + + # Composite models don't have a default config, use their decoder config as a fallback for default values + # If no known pattern is matched, then `default_config = None` -> check against the global generation defaults + try: + default_config = self.__class__() + except ValueError: + for decoder_attribute_name in ("decoder", "generator", "text_config"): + if hasattr(self, decoder_attribute_name): + default_config = getattr(self, decoder_attribute_name).__class__() + break + + # If it is a composite model, we want to check the subconfig that will be used for generation + self_decoder_config = self if decoder_attribute_name is None else getattr(self, decoder_attribute_name) + + for parameter_name, default_global_value in self._get_global_generation_defaults().items(): + if hasattr(self_decoder_config, parameter_name): + is_default_in_config = is_default_generation_value = None + parameter_value = getattr(self_decoder_config, parameter_name) + # Three cases in which is okay for the model config to hold generation config parameters: + # 1. The parameter is set to `None`, effectivelly delegating its value to the generation config + if parameter_value is None: + continue + # 2. If we have a default config, then the instance should hold the same generation defaults + if default_config is not None: + is_default_in_config = parameter_value == getattr(default_config, parameter_name) + # 3. if we don't have a default config, then the instance should hold the global generation defaults + else: + is_default_generation_value = parameter_value == default_global_value + + is_non_default = (is_default_in_config is False) or ( + is_default_in_config is None and is_default_generation_value is False + ) + if is_non_default: + non_default_generation_parameters[parameter_name] = getattr(self_decoder_config, parameter_name) + + return non_default_generation_parameters def get_configuration_file(configuration_files: List[str]) -> str: diff --git a/src/transformers/generation/__init__.py b/src/transformers/generation/__init__.py index 6880321d6326..faf5266b84ae 100644 --- a/src/transformers/generation/__init__.py +++ b/src/transformers/generation/__init__.py @@ -55,7 +55,6 @@ "ExponentialDecayLengthPenalty", "ForcedBOSTokenLogitsProcessor", "ForcedEOSTokenLogitsProcessor", - "ForceTokensLogitsProcessor", "HammingDiversityLogitsProcessor", "InfNanRemoveLogitsProcessor", "LogitNormalization", @@ -201,7 +200,6 @@ ExponentialDecayLengthPenalty, ForcedBOSTokenLogitsProcessor, ForcedEOSTokenLogitsProcessor, - ForceTokensLogitsProcessor, HammingDiversityLogitsProcessor, InfNanRemoveLogitsProcessor, LogitNormalization, diff --git a/src/transformers/generation/logits_process.py b/src/transformers/generation/logits_process.py index 7f89e239245b..e9ba45606829 100644 --- a/src/transformers/generation/logits_process.py +++ b/src/transformers/generation/logits_process.py @@ -15,7 +15,6 @@ import inspect import math -import warnings from typing import Callable, Dict, Iterable, List, Optional, Tuple, Union import numpy as np @@ -1844,34 +1843,6 @@ def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> to return scores -class ForceTokensLogitsProcessor(LogitsProcessor): - r""" - This processor takes a list of pairs of integers which indicates a mapping from generation indices to token - indices that will be forced before generation. The processor will set their log probs to `inf` so that they are - sampled at their corresponding index. Originally created for - [Whisper](https://huggingface.co/docs/transformers/model_doc/whisper). - """ - - def __init__(self, force_token_map: List[List[int]], _has_warned: Optional[bool] = False): - self.force_token_map = dict(force_token_map) - if not _has_warned: - # TODO(Sanchit): remove this processor entirely in v4.40 - warnings.warn( - "This `ForceTokensLogitsProcessor` has been deprecated and will be removed in v4.40. Should you need to provide prompt ids for generation, specify `input_ids` to the generate method for decoder-only models, or `decoder_input_ids` for encoder-decoder models.", - FutureWarning, - ) - - @add_start_docstrings(LOGITS_PROCESSOR_INPUTS_DOCSTRING) - def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> torch.FloatTensor: - generation_idx = input_ids.shape[-1] - current_token = self.force_token_map.get(generation_idx, None) - scores_processed = scores - if current_token is not None: - scores_processed = torch.full_like(scores, -float("inf")) - scores_processed[:, current_token] = 0 - return scores_processed - - class WhisperTimeStampLogitsProcessor(LogitsProcessor): r""" diff --git a/src/transformers/generation/stopping_criteria.py b/src/transformers/generation/stopping_criteria.py index f8e94f6f86a0..961b6d6f5e43 100644 --- a/src/transformers/generation/stopping_criteria.py +++ b/src/transformers/generation/stopping_criteria.py @@ -85,36 +85,6 @@ def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwa return torch.full((input_ids.shape[0],), is_done, device=input_ids.device, dtype=torch.bool) -class MaxNewTokensCriteria(StoppingCriteria): - """ - This class can be used to stop generation whenever the generated number of tokens exceeds `max_new_tokens`. Keep in - mind for decoder-only type of transformers, this will **not** include the initial prompted tokens. This is very - close to `MaxLengthCriteria` but ignores the number of initial tokens. - - Args: - start_length (`int`): - The number of initial tokens. - max_new_tokens (`int`): - The maximum number of tokens to generate. - """ - - def __init__(self, start_length: int, max_new_tokens: int): - warnings.warn( - "The class `MaxNewTokensCriteria` is deprecated and will be removed in v4.43. " - f"Please use `MaxLengthCriteria(max_length={start_length + max_new_tokens})` " - "with `max_length = start_length + max_new_tokens` instead.", - FutureWarning, - ) - self.start_length = start_length - self.max_new_tokens = max_new_tokens - self.max_length = start_length + max_new_tokens - - @add_start_docstrings(STOPPING_CRITERIA_INPUTS_DOCSTRING) - def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> torch.BoolTensor: - is_done = input_ids.shape[-1] >= self.max_length - return torch.full((input_ids.shape[0],), is_done, device=input_ids.device, dtype=torch.bool) - - class MaxTimeCriteria(StoppingCriteria): """ This class can be used to stop generation whenever the full generation exceeds some amount of time. By default, the @@ -516,8 +486,6 @@ def max_length(self) -> Optional[int]: for stopping_criterium in self: if isinstance(stopping_criterium, MaxLengthCriteria): return stopping_criterium.max_length - elif isinstance(stopping_criterium, MaxNewTokensCriteria): - return stopping_criterium.max_length return None diff --git a/src/transformers/generation/tf_logits_process.py b/src/transformers/generation/tf_logits_process.py index fc9799b7ab39..58824b7b0071 100644 --- a/src/transformers/generation/tf_logits_process.py +++ b/src/transformers/generation/tf_logits_process.py @@ -520,15 +520,21 @@ def __init__(self, begin_suppress_tokens, begin_index): self.begin_index = begin_index def __call__(self, input_ids: tf.Tensor, scores: tf.Tensor, cur_len: int) -> tf.Tensor: - scores = tf.cond( - tf.equal(cur_len, self.begin_index), - lambda: tf.tensor_scatter_nd_update( - scores, - indices=[[i, token] for i in range(scores.shape[0]) for token in self.begin_suppress_tokens], - updates=[-float("inf") for _ in range(scores.shape[0] * len(self.begin_suppress_tokens))], - ), - lambda: scores, - ) + suppressed_indices = [] + for token in self.begin_suppress_tokens: + if token < scores.shape[-1]: # to ensure we don't go beyond the vocab size + suppressed_indices.extend([[i, token] for i in range(scores.shape[0])]) + + if len(suppressed_indices) > 0: + scores = tf.cond( + tf.equal(cur_len, self.begin_index), + lambda: tf.tensor_scatter_nd_update( + scores, + indices=suppressed_indices, + updates=[-float("inf") for _ in range(scores.shape[0] * len(self.begin_suppress_tokens))], + ), + lambda: scores, + ) return scores @@ -540,11 +546,17 @@ def __init__(self, suppress_tokens): self.suppress_tokens = list(suppress_tokens) def __call__(self, input_ids: tf.Tensor, scores: tf.Tensor, cur_len: int) -> tf.Tensor: - scores = tf.tensor_scatter_nd_update( - scores, - indices=[[i, token] for i in range(scores.shape[0]) for token in self.suppress_tokens], - updates=[-float("inf") for _ in range(scores.shape[0] * len(self.suppress_tokens))], - ) + suppressed_indices = [] + for token in self.suppress_tokens: + if token < scores.shape[-1]: # to ensure we don't go beyond the vocab size + suppressed_indices.extend([[i, token] for i in range(scores.shape[0])]) + + if len(suppressed_indices) > 0: + scores = tf.tensor_scatter_nd_update( + scores, + indices=[[i, token] for i in range(scores.shape[0]) for token in self.suppress_tokens], + updates=[-float("inf") for _ in range(scores.shape[0] * len(self.suppress_tokens))], + ) return scores diff --git a/src/transformers/generation/utils.py b/src/transformers/generation/utils.py index a9ebdcdd4775..07251a5c4e50 100644 --- a/src/transformers/generation/utils.py +++ b/src/transformers/generation/utils.py @@ -76,7 +76,6 @@ ExponentialDecayLengthPenalty, ForcedBOSTokenLogitsProcessor, ForcedEOSTokenLogitsProcessor, - ForceTokensLogitsProcessor, HammingDiversityLogitsProcessor, InfNanRemoveLogitsProcessor, LogitNormalization, @@ -865,9 +864,6 @@ def _get_logits_processor( if (input_ids_seq_length > 1 or generation_config.forced_bos_token_id is None) else begin_index + 1 ) - if generation_config.forced_decoder_ids is not None: - # generation starts after the last token that is forced - begin_index += generation_config.forced_decoder_ids[-1][0] processors.append( SuppressTokensAtBeginLogitsProcessor( generation_config.begin_suppress_tokens, @@ -876,12 +872,11 @@ def _get_logits_processor( ) ) if generation_config.forced_decoder_ids is not None: - # TODO(Sanchit): deprecate in v4.40 by removing this logic - warnings.warn( - "You have explicitly specified `forced_decoder_ids`. This functionality has been deprecated and will throw an error in v4.40. Please remove the `forced_decoder_ids` argument in favour of `input_ids` or `decoder_input_ids` respectively.", - FutureWarning, + # TODO (sanchit): move this exception to GenerationConfig.validate() when TF & FLAX are aligned with PT + raise ValueError( + "You have explicitly specified `forced_decoder_ids`. Please remove the `forced_decoder_ids` argument " + "in favour of `input_ids` or `decoder_input_ids` respectively.", ) - processors.append(ForceTokensLogitsProcessor(generation_config.forced_decoder_ids, _has_warned=True)) if generation_config.watermarking_config is not None: processors.append( WatermarkLogitsProcessor( @@ -1344,19 +1339,18 @@ def _prepare_generation_config( using_model_generation_config = False if generation_config is None: # legacy: users may modify the model configuration to control generation. To trigger this legacy behavior, - # three conditions must be met + # the following conditions must be met # 1) the generation config must have been created from the model config (`_from_model_config` field); # 2) the generation config must have seen no modification since its creation (the hash is the same); # 3) the user must have set generation parameters in the model config. # NOTE: `torch.compile` can't compile `hash`, this legacy support is disabled with compilation. if ( not is_torchdynamo_compiling() - and self.generation_config._from_model_config - and self.generation_config._original_object_hash == hash(self.generation_config) - and self.config._has_non_default_generation_parameters() + and self.generation_config._from_model_config # 1) + and self.generation_config._original_object_hash == hash(self.generation_config) # 2) ): new_generation_config = GenerationConfig.from_model_config(self.config) - if new_generation_config != self.generation_config: + if new_generation_config != self.generation_config: # 3) warnings.warn( "You have modified the pretrained model configuration to control generation. This is a" " deprecated strategy to control generation and will be removed soon, in a future version." @@ -2273,13 +2267,6 @@ def heal_tokens( return input_ids - def contrastive_search(self, *args, **kwargs): - logger.warning_once( - "Calling `contrastive_search` directly is deprecated and will be removed in v4.41. Use `generate` or a " - "custom generation loop instead.", - ) - return self._contrastive_search(*args, **kwargs) - def _dola_decoding( self, input_ids: torch.LongTensor, diff --git a/src/transformers/modeling_utils.py b/src/transformers/modeling_utils.py index b92d4b447f19..b943b5e7989f 100755 --- a/src/transformers/modeling_utils.py +++ b/src/transformers/modeling_utils.py @@ -2571,26 +2571,21 @@ def save_pretrained( # Save the config if is_main_process: if not _hf_peft_config_loaded: + # If the model config has set attributes that should be in the generation config, move them there. + misplaced_generation_parameters = model_to_save.config._get_non_default_generation_parameters() + if self.can_generate() and len(misplaced_generation_parameters) > 0: + warnings.warn( + "Moving the following attributes in the config to the generation config: " + f"{misplaced_generation_parameters}. You are seeing this warning because you've set " + "generation parameters in the model config, as opposed to in the generation config.", + UserWarning, + ) + for param_name, param_value in misplaced_generation_parameters.items(): + setattr(model_to_save.generation_config, param_name, param_value) + setattr(model_to_save.config, param_name, None) + model_to_save.config.save_pretrained(save_directory) if self.can_generate(): - # generation config built from the model config + the model config holds generation kwargs -> generate - # may revert to legacy behavior if the two don't match - if ( - model_to_save.generation_config._from_model_config - and model_to_save.config._has_non_default_generation_parameters() - ): - new_generation_config = GenerationConfig.from_model_config(model_to_save.config) - if new_generation_config != model_to_save.generation_config: - logger.warning( - "Your generation config was originally created from the model config, but the model " - "config has changed since then. Unless you pass the `generation_config` argument to this " - "model's `generate` calls, they will revert to the legacy behavior where the base " - "`generate` parameterization is loaded from the model config instead. " - "To avoid this behavior and this warning, we recommend you to overwrite the generation " - "config model attribute before calling the model's `save_pretrained`, preferably also " - "removing any generation kwargs from the model config. This warning will be raised to an " - "exception in v4.41." - ) model_to_save.generation_config.save_pretrained(save_directory) if _hf_peft_config_loaded: diff --git a/src/transformers/models/audio_spectrogram_transformer/configuration_audio_spectrogram_transformer.py b/src/transformers/models/audio_spectrogram_transformer/configuration_audio_spectrogram_transformer.py index 9e1d995dc291..7980667a68d7 100644 --- a/src/transformers/models/audio_spectrogram_transformer/configuration_audio_spectrogram_transformer.py +++ b/src/transformers/models/audio_spectrogram_transformer/configuration_audio_spectrogram_transformer.py @@ -14,6 +14,8 @@ # limitations under the License. """Audio Spectogram Transformer (AST) model configuration""" +from typing import Any, Dict + from ...configuration_utils import PretrainedConfig from ...utils import logging @@ -118,3 +120,9 @@ def __init__( self.time_stride = time_stride self.max_length = max_length self.num_mel_bins = num_mel_bins + + # Overwritten from the parent class: AST is not compatible with `generate`, but has a config parameter sharing the + # same name (`max_length`). Sharing the same name triggers checks regarding the config -> generation_config + # generative parameters deprecation cycle, overwriting this function prevents this from happening. + def _get_non_default_generation_parameters(self) -> Dict[str, Any]: + return {} diff --git a/src/transformers/models/bloom/modeling_bloom.py b/src/transformers/models/bloom/modeling_bloom.py index c1caae6c6857..e365744f8b9e 100644 --- a/src/transformers/models/bloom/modeling_bloom.py +++ b/src/transformers/models/bloom/modeling_bloom.py @@ -693,7 +693,7 @@ def forward( past_key_values = DynamicCache.from_legacy_cache(past_key_values) logger.warning_once( "Using `past_key_values` as a tuple is deprecated and will be removed in v4.45. " - "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)" + "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/internal/generation_utils#transformers.Cache)" ) batch_size, seq_length, _ = inputs_embeds.shape diff --git a/src/transformers/models/codegen/modeling_codegen.py b/src/transformers/models/codegen/modeling_codegen.py index 1920f350f559..e668a0dc0631 100644 --- a/src/transformers/models/codegen/modeling_codegen.py +++ b/src/transformers/models/codegen/modeling_codegen.py @@ -532,7 +532,7 @@ def forward( if not self.training: logger.warning_once( "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.45. " - "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)" + "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/internal/generation_utils#transformers.Cache)" ) seq_length = inputs_embeds.shape[1] diff --git a/src/transformers/models/dbrx/modeling_dbrx.py b/src/transformers/models/dbrx/modeling_dbrx.py index 3486d5ed3ab0..1d8f011c3f6a 100644 --- a/src/transformers/models/dbrx/modeling_dbrx.py +++ b/src/transformers/models/dbrx/modeling_dbrx.py @@ -1066,7 +1066,7 @@ def forward( past_key_values = DynamicCache.from_legacy_cache(past_key_values) logger.warning_once( "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. " - "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)" + "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/internal/generation_utils#transformers.Cache)" ) if cache_position is None: diff --git a/src/transformers/models/encoder_decoder/configuration_encoder_decoder.py b/src/transformers/models/encoder_decoder/configuration_encoder_decoder.py index 8c0ae2771e81..ab5d49b32fea 100644 --- a/src/transformers/models/encoder_decoder/configuration_encoder_decoder.py +++ b/src/transformers/models/encoder_decoder/configuration_encoder_decoder.py @@ -74,9 +74,11 @@ class EncoderDecoderConfig(PretrainedConfig): def __init__(self, **kwargs): super().__init__(**kwargs) - assert ( - "encoder" in kwargs and "decoder" in kwargs - ), "Config has to be initialized with encoder and decoder config" + if "encoder" not in kwargs or "decoder" not in kwargs: + raise ValueError( + f"A configuraton of type {self.model_type} cannot be instantiated because " + f"both `encoder` and `decoder` sub-configurations were not passed, only {kwargs}" + ) encoder_config = kwargs.pop("encoder") encoder_model_type = encoder_config.pop("model_type") decoder_config = kwargs.pop("decoder") diff --git a/src/transformers/models/falcon/modeling_falcon.py b/src/transformers/models/falcon/modeling_falcon.py index edaef78f9286..a9acd171c3ae 100644 --- a/src/transformers/models/falcon/modeling_falcon.py +++ b/src/transformers/models/falcon/modeling_falcon.py @@ -1029,7 +1029,7 @@ def forward( if not self.training: logger.warning_once( "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.45. " - "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)" + "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/internal/generation_utils#transformers.Cache)" ) alibi = None diff --git a/src/transformers/models/gemma/modeling_gemma.py b/src/transformers/models/gemma/modeling_gemma.py index a05d2c059e21..c648dee8fb56 100644 --- a/src/transformers/models/gemma/modeling_gemma.py +++ b/src/transformers/models/gemma/modeling_gemma.py @@ -861,7 +861,7 @@ def forward( past_key_values = DynamicCache.from_legacy_cache(past_key_values) logger.warning_once( "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. " - "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)" + "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/internal/generation_utils#transformers.Cache)" ) # decoder layers diff --git a/src/transformers/models/git/modeling_git.py b/src/transformers/models/git/modeling_git.py index 581f2b3947b4..4807289c927c 100644 --- a/src/transformers/models/git/modeling_git.py +++ b/src/transformers/models/git/modeling_git.py @@ -423,7 +423,7 @@ def forward( past_key_values = DynamicCache.from_legacy_cache(past_key_values) logger.warning_once( "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.45. " - "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)" + "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/internal/generation_utils#transformers.Cache)" ) all_hidden_states = () if output_hidden_states else None diff --git a/src/transformers/models/gpt_neo/modeling_gpt_neo.py b/src/transformers/models/gpt_neo/modeling_gpt_neo.py index 3a606c37b31c..65144ad0c0f1 100755 --- a/src/transformers/models/gpt_neo/modeling_gpt_neo.py +++ b/src/transformers/models/gpt_neo/modeling_gpt_neo.py @@ -747,7 +747,7 @@ def forward( if not self.training: logger.warning_once( "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.45. " - "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)" + "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/internal/generation_utils#transformers.Cache)" ) seq_length = inputs_embeds.shape[1] diff --git a/src/transformers/models/gpt_neox/modeling_gpt_neox.py b/src/transformers/models/gpt_neox/modeling_gpt_neox.py index 22fbb0429f59..5d21f2d2a725 100755 --- a/src/transformers/models/gpt_neox/modeling_gpt_neox.py +++ b/src/transformers/models/gpt_neox/modeling_gpt_neox.py @@ -928,7 +928,7 @@ def forward( if not self.training: logger.warning_once( "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.45. " - "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)" + "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/internal/generation_utils#transformers.Cache)" ) seq_length = inputs_embeds.shape[1] diff --git a/src/transformers/models/gptj/modeling_gptj.py b/src/transformers/models/gptj/modeling_gptj.py index 82540fe98ec7..ba0f319791e4 100644 --- a/src/transformers/models/gptj/modeling_gptj.py +++ b/src/transformers/models/gptj/modeling_gptj.py @@ -819,7 +819,7 @@ def forward( if not self.training: logger.warning_once( "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.45. " - "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)" + "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/internal/generation_utils#transformers.Cache)" ) seq_length = inputs_embeds.shape[1] diff --git a/src/transformers/models/idefics/modeling_idefics.py b/src/transformers/models/idefics/modeling_idefics.py index 3532219f3d6c..b4c24a46bb68 100644 --- a/src/transformers/models/idefics/modeling_idefics.py +++ b/src/transformers/models/idefics/modeling_idefics.py @@ -1243,7 +1243,7 @@ def forward( if not self.training: logger.warning_once( "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.45. " - "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)" + "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/internal/generation_utils#transformers.Cache)" ) return_legacy_cache = True past_key_values = DynamicCache.from_legacy_cache(past_key_values) diff --git a/src/transformers/models/llama/modeling_llama.py b/src/transformers/models/llama/modeling_llama.py index 8716d27f5481..293ce1dd7f6b 100644 --- a/src/transformers/models/llama/modeling_llama.py +++ b/src/transformers/models/llama/modeling_llama.py @@ -950,7 +950,7 @@ def forward( past_key_values = DynamicCache.from_legacy_cache(past_key_values) logger.warning_once( "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. " - "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)" + "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/internal/generation_utils#transformers.Cache)" ) if cache_position is None: diff --git a/src/transformers/models/mistral/modeling_mistral.py b/src/transformers/models/mistral/modeling_mistral.py index 1a2b732e85e4..7ee8a12e74cb 100644 --- a/src/transformers/models/mistral/modeling_mistral.py +++ b/src/transformers/models/mistral/modeling_mistral.py @@ -766,7 +766,7 @@ def forward( return_legacy_cache = True logger.warning_once( "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. " - "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)" + "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/internal/generation_utils#transformers.Cache)" ) if cache_position is None: diff --git a/src/transformers/models/mixtral/modeling_mixtral.py b/src/transformers/models/mixtral/modeling_mixtral.py index 522b6db7bcc7..d9ca3be7b0d4 100644 --- a/src/transformers/models/mixtral/modeling_mixtral.py +++ b/src/transformers/models/mixtral/modeling_mixtral.py @@ -1022,7 +1022,7 @@ def forward( past_key_values = DynamicCache.from_legacy_cache(past_key_values) logger.warning_once( "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. " - "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)" + "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/internal/generation_utils#transformers.Cache)" ) if inputs_embeds is None: diff --git a/src/transformers/models/olmo/modeling_olmo.py b/src/transformers/models/olmo/modeling_olmo.py index 1940660f61b5..c185112f318c 100644 --- a/src/transformers/models/olmo/modeling_olmo.py +++ b/src/transformers/models/olmo/modeling_olmo.py @@ -872,7 +872,7 @@ def forward( past_key_values = DynamicCache.from_legacy_cache(past_key_values) logger.warning_once( "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. " - "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)" + "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/internal/generation_utils#transformers.Cache)" ) if cache_position is None: diff --git a/src/transformers/models/persimmon/modeling_persimmon.py b/src/transformers/models/persimmon/modeling_persimmon.py index 1e4f56c0674d..e6f3f491d309 100644 --- a/src/transformers/models/persimmon/modeling_persimmon.py +++ b/src/transformers/models/persimmon/modeling_persimmon.py @@ -682,7 +682,7 @@ def forward( past_key_values = DynamicCache.from_legacy_cache(past_key_values) logger.warning_once( "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. " - "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)" + "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/internal/generation_utils#transformers.Cache)" ) if inputs_embeds is None: diff --git a/src/transformers/models/phi/modeling_phi.py b/src/transformers/models/phi/modeling_phi.py index 6d63c0ea7e8e..f53ae35ca4ce 100644 --- a/src/transformers/models/phi/modeling_phi.py +++ b/src/transformers/models/phi/modeling_phi.py @@ -966,7 +966,7 @@ def forward( past_key_values = DynamicCache.from_legacy_cache(past_key_values) logger.warning_once( "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. " - "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)" + "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/internal/generation_utils#transformers.Cache)" ) if inputs_embeds is None: diff --git a/src/transformers/models/phi3/modeling_phi3.py b/src/transformers/models/phi3/modeling_phi3.py index 08417fcabfaa..d6788c5cc350 100644 --- a/src/transformers/models/phi3/modeling_phi3.py +++ b/src/transformers/models/phi3/modeling_phi3.py @@ -1007,7 +1007,7 @@ def forward( past_key_values = DynamicCache.from_legacy_cache(past_key_values) logger.warning_once( "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. " - "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)" + "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/internal/generation_utils#transformers.Cache)" ) if inputs_embeds is None: diff --git a/src/transformers/models/qwen2/modeling_qwen2.py b/src/transformers/models/qwen2/modeling_qwen2.py index 28b414b1901b..59fe54819df1 100644 --- a/src/transformers/models/qwen2/modeling_qwen2.py +++ b/src/transformers/models/qwen2/modeling_qwen2.py @@ -871,7 +871,7 @@ def forward( past_key_values = DynamicCache.from_legacy_cache(past_key_values) logger.warning_once( "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. " - "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)" + "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/internal/generation_utils#transformers.Cache)" ) if inputs_embeds is None: diff --git a/src/transformers/models/qwen2_moe/modeling_qwen2_moe.py b/src/transformers/models/qwen2_moe/modeling_qwen2_moe.py index 12ebe26e058d..f1f8ca3ff532 100644 --- a/src/transformers/models/qwen2_moe/modeling_qwen2_moe.py +++ b/src/transformers/models/qwen2_moe/modeling_qwen2_moe.py @@ -1033,7 +1033,7 @@ def forward( past_key_values = DynamicCache.from_legacy_cache(past_key_values) logger.warning_once( "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. " - "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)" + "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/internal/generation_utils#transformers.Cache)" ) if inputs_embeds is None: diff --git a/src/transformers/models/rag/configuration_rag.py b/src/transformers/models/rag/configuration_rag.py index 5dd4d12c5e74..98de7382a456 100644 --- a/src/transformers/models/rag/configuration_rag.py +++ b/src/transformers/models/rag/configuration_rag.py @@ -124,9 +124,11 @@ def __init__( vocab_size=vocab_size, **kwargs, ) - assert ( - "question_encoder" in kwargs and "generator" in kwargs - ), "Config has to be initialized with question_encoder and generator config" + if "question_encoder" not in kwargs or "generator" not in kwargs: + raise ValueError( + f"A configuraton of type {self.model_type} cannot be instantiated because " + f"both `question_encoder` and `generator` sub-configurations were not passed, only {kwargs}" + ) question_encoder_config = kwargs.pop("question_encoder") question_encoder_model_type = question_encoder_config.pop("model_type") decoder_config = kwargs.pop("generator") diff --git a/src/transformers/models/stablelm/modeling_stablelm.py b/src/transformers/models/stablelm/modeling_stablelm.py index 988948a9a827..ae317af37875 100755 --- a/src/transformers/models/stablelm/modeling_stablelm.py +++ b/src/transformers/models/stablelm/modeling_stablelm.py @@ -959,7 +959,7 @@ def forward( past_key_values = DynamicCache.from_legacy_cache(past_key_values) logger.warning_once( "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. " - "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)" + "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/internal/generation_utils#transformers.Cache)" ) if inputs_embeds is None: diff --git a/src/transformers/models/starcoder2/modeling_starcoder2.py b/src/transformers/models/starcoder2/modeling_starcoder2.py index d51077b04254..21469e9d2223 100644 --- a/src/transformers/models/starcoder2/modeling_starcoder2.py +++ b/src/transformers/models/starcoder2/modeling_starcoder2.py @@ -844,7 +844,7 @@ def forward( past_key_values = DynamicCache.from_legacy_cache(past_key_values) logger.warning_once( "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. " - "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)" + "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/internal/generation_utils#transformers.Cache)" ) if inputs_embeds is None: diff --git a/src/transformers/utils/dummy_pt_objects.py b/src/transformers/utils/dummy_pt_objects.py index 4732ecea8611..18df9ad6193f 100644 --- a/src/transformers/utils/dummy_pt_objects.py +++ b/src/transformers/utils/dummy_pt_objects.py @@ -289,13 +289,6 @@ def __init__(self, *args, **kwargs): requires_backends(self, ["torch"]) -class ForceTokensLogitsProcessor(metaclass=DummyObject): - _backends = ["torch"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch"]) - - class GenerationMixin(metaclass=DummyObject): _backends = ["torch"] diff --git a/tests/generation/test_stopping_criteria.py b/tests/generation/test_stopping_criteria.py index ddf9a1c9379e..a04dac96169e 100644 --- a/tests/generation/test_stopping_criteria.py +++ b/tests/generation/test_stopping_criteria.py @@ -28,7 +28,6 @@ from transformers.generation import ( EosTokenCriteria, MaxLengthCriteria, - MaxNewTokensCriteria, MaxTimeCriteria, StoppingCriteriaList, StopStringCriteria, @@ -76,21 +75,6 @@ def test_max_length_criteria(self): input_ids, scores = self._get_tensors(10) self.assertTrue(all(criteria(input_ids, scores))) - def test_max_new_tokens_criteria(self): - criteria = MaxNewTokensCriteria(start_length=5, max_new_tokens=5) - - input_ids, scores = self._get_tensors(5) - self.assertFalse(all(criteria(input_ids, scores))) - - input_ids, scores = self._get_tensors(9) - self.assertFalse(all(criteria(input_ids, scores))) - - input_ids, scores = self._get_tensors(10) - self.assertTrue(all(criteria(input_ids, scores))) - - criteria_list = StoppingCriteriaList([criteria]) - self.assertEqual(criteria_list.max_length, 10) - def test_max_time_criteria(self): input_ids, scores = self._get_tensors(5) diff --git a/tests/generation/test_utils.py b/tests/generation/test_utils.py index ae52f6c67404..db2bb7989dea 100644 --- a/tests/generation/test_utils.py +++ b/tests/generation/test_utils.py @@ -1576,6 +1576,11 @@ def test_generate_continue_from_past_key_values(self): # 3. ignore `token_type_ids` for simplicity # 4. ignore `forced_eos_token_id`, which requires further manipulation of the continuation inputs and is # active by default on some models + # 5. ignore `encoder_no_repeat_ngram_size`, which is set by default in some encoder-decoder models. When + # we use their decoder as a stand-alone model, `encoder_no_repeat_ngram_size` actually prevents + # repetition exclusively from the prompt. This test relies on comparing one call vs 2 calls + # with cache, what is considered a prompt is different in the two cases. + if "token_type_ids" in inputs: del inputs["token_type_ids"] @@ -1583,6 +1588,7 @@ def test_generate_continue_from_past_key_values(self): model.eval() model.generation_config.pad_token_id = model.generation_config.eos_token_id = -1 model.generation_config.forced_eos_token_id = None + model.generation_config.encoder_no_repeat_ngram_size = 0 model.generation_config.use_cache = True # If "past_key_values" is not returned, skip the test (e.g. RWKV uses a different cache name and format) @@ -2790,7 +2796,7 @@ def forward(self, input_ids, **kwargs): def test_default_max_length_warning(self): model = AutoModelForCausalLM.from_pretrained("hf-internal-testing/tiny-random-gpt2").to(torch_device) tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-gpt2") - model.config.pad_token_id = tokenizer.eos_token_id + model.generation_config.pad_token_id = tokenizer.eos_token_id text = "Hello world" tokenized_inputs = tokenizer([text], return_tensors="pt") @@ -2817,8 +2823,8 @@ def test_length_warning_assisted_generation(self): model = AutoModelForCausalLM.from_pretrained("hf-internal-testing/tiny-random-gpt2").to(torch_device) assistant = AutoModelForCausalLM.from_pretrained("hf-internal-testing/tiny-random-gpt2").to(torch_device) tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-gpt2") - model.config.pad_token_id = tokenizer.eos_token_id - assistant.config.pad_token_id = tokenizer.eos_token_id + model.generation_config.pad_token_id = tokenizer.eos_token_id + assistant.generation_config.pad_token_id = tokenizer.eos_token_id text = "Hello world" tokenized_inputs = tokenizer([text], return_tensors="pt") @@ -2839,8 +2845,8 @@ def test_generated_length_assisted_generation(self): model = AutoModelForCausalLM.from_pretrained("hf-internal-testing/tiny-random-gpt2").to(torch_device) assistant = AutoModelForCausalLM.from_pretrained("hf-internal-testing/tiny-random-gpt2").to(torch_device) tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-gpt2") - model.config.pad_token_id = tokenizer.eos_token_id - assistant.config.pad_token_id = tokenizer.eos_token_id + model.generation_config.pad_token_id = tokenizer.eos_token_id + assistant.generation_config.pad_token_id = tokenizer.eos_token_id text = "Hello world" tokenized_inputs = tokenizer([text], return_tensors="pt") @@ -2866,7 +2872,7 @@ def test_model_kwarg_assisted_decoding_decoder_only(self): # PT-only test: TF doesn't support assisted decoding yet. model = AutoModelForCausalLM.from_pretrained("hf-internal-testing/tiny-random-gpt2").to(torch_device) tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-gpt2") - model.config.pad_token_id = tokenizer.eos_token_id + model.generation_config.pad_token_id = tokenizer.eos_token_id text = "Hello world" tokenized_inputs = tokenizer([text], return_tensors="pt") diff --git a/tests/models/bart/test_modeling_bart.py b/tests/models/bart/test_modeling_bart.py index dd0cb5bf4c0b..eda51d21199f 100644 --- a/tests/models/bart/test_modeling_bart.py +++ b/tests/models/bart/test_modeling_bart.py @@ -123,12 +123,6 @@ def __init__( self.pad_token_id = pad_token_id self.bos_token_id = bos_token_id - # forcing a certain token to be generated, sets all other tokens to -inf - # if however the token to be generated is already at -inf then it can lead token - # `nan` values and thus break generation - self.forced_bos_token_id = None - self.forced_eos_token_id = None - def prepare_config_and_inputs(self): input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size).clamp( @@ -158,8 +152,6 @@ def get_config(self): eos_token_id=self.eos_token_id, bos_token_id=self.bos_token_id, pad_token_id=self.pad_token_id, - forced_bos_token_id=self.forced_bos_token_id, - forced_eos_token_id=self.forced_eos_token_id, ) def get_pipeline_config(self): diff --git a/tests/models/blenderbot/test_modeling_blenderbot.py b/tests/models/blenderbot/test_modeling_blenderbot.py index fa0797cbeed8..cecedb8a9071 100644 --- a/tests/models/blenderbot/test_modeling_blenderbot.py +++ b/tests/models/blenderbot/test_modeling_blenderbot.py @@ -116,12 +116,6 @@ def __init__( self.pad_token_id = pad_token_id self.bos_token_id = bos_token_id - # forcing a certain token to be generated, sets all other tokens to -inf - # if however the token to be generated is already at -inf then it can lead token - # `nan` values and thus break generation - self.forced_bos_token_id = None - self.forced_eos_token_id = None - def prepare_config_and_inputs(self): input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size).clamp( 3, @@ -150,8 +144,6 @@ def get_config(self): eos_token_id=self.eos_token_id, bos_token_id=self.bos_token_id, pad_token_id=self.pad_token_id, - forced_bos_token_id=self.forced_bos_token_id, - forced_eos_token_id=self.forced_eos_token_id, ) def get_pipeline_config(self): @@ -368,7 +360,6 @@ def __init__( decoder_attention_heads=4, max_position_embeddings=30, is_encoder_decoder=False, - encoder_no_repeat_ngram_size=0, pad_token_id=0, bos_token_id=1, eos_token_id=2, @@ -399,7 +390,6 @@ def __init__( self.use_cache = use_cache self.max_position_embeddings = max_position_embeddings self.is_encoder_decoder = is_encoder_decoder - self.encoder_no_repeat_ngram_size = encoder_no_repeat_ngram_size self.scope = None self.decoder_key_length = decoder_seq_length @@ -431,7 +421,6 @@ def prepare_config_and_inputs(self): decoder_start_token_id=self.decoder_start_token_id, max_position_embeddings=self.max_position_embeddings, is_encoder_decoder=self.is_encoder_decoder, - encoder_no_repeat_ngram_size=self.encoder_no_repeat_ngram_size, ) return ( diff --git a/tests/models/blenderbot_small/test_modeling_blenderbot_small.py b/tests/models/blenderbot_small/test_modeling_blenderbot_small.py index 6be86a66b98e..59f68b547547 100644 --- a/tests/models/blenderbot_small/test_modeling_blenderbot_small.py +++ b/tests/models/blenderbot_small/test_modeling_blenderbot_small.py @@ -113,12 +113,6 @@ def __init__( self.pad_token_id = pad_token_id self.bos_token_id = bos_token_id - # forcing a certain token to be generated, sets all other tokens to -inf - # if however the token to be generated is already at -inf then it can lead token - # `nan` values and thus break generation - self.forced_bos_token_id = None - self.forced_eos_token_id = None - def prepare_config_and_inputs(self): input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size).clamp( 3, @@ -147,8 +141,6 @@ def get_config(self): eos_token_id=self.eos_token_id, bos_token_id=self.bos_token_id, pad_token_id=self.pad_token_id, - forced_bos_token_id=self.forced_bos_token_id, - forced_eos_token_id=self.forced_eos_token_id, ) def prepare_config_and_inputs_for_common(self): diff --git a/tests/models/decision_transformer/test_modeling_decision_transformer.py b/tests/models/decision_transformer/test_modeling_decision_transformer.py index 27d1598167e6..0c95e6291c50 100644 --- a/tests/models/decision_transformer/test_modeling_decision_transformer.py +++ b/tests/models/decision_transformer/test_modeling_decision_transformer.py @@ -41,7 +41,6 @@ def __init__( act_dim=6, state_dim=17, hidden_size=23, - max_length=11, is_training=True, ): self.parent = parent @@ -50,7 +49,6 @@ def __init__( self.act_dim = act_dim self.state_dim = state_dim self.hidden_size = hidden_size - self.max_length = max_length self.is_training = is_training def prepare_config_and_inputs(self): @@ -80,7 +78,6 @@ def get_config(self): act_dim=self.act_dim, state_dim=self.state_dim, hidden_size=self.hidden_size, - max_length=self.max_length, ) def create_and_check_model( diff --git a/tests/models/marian/test_modeling_marian.py b/tests/models/marian/test_modeling_marian.py index 4f4fa36622d1..aed5381fcc70 100644 --- a/tests/models/marian/test_modeling_marian.py +++ b/tests/models/marian/test_modeling_marian.py @@ -132,12 +132,6 @@ def __init__( self.bos_token_id = bos_token_id self.decoder_start_token_id = decoder_start_token_id - # forcing a certain token to be generated, sets all other tokens to -inf - # if however the token to be generated is already at -inf then it can lead token - # `nan` values and thus break generation - self.forced_bos_token_id = None - self.forced_eos_token_id = None - def prepare_config_and_inputs(self): input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size).clamp( 3, @@ -167,8 +161,6 @@ def get_config(self): bos_token_id=self.bos_token_id, pad_token_id=self.pad_token_id, decoder_start_token_id=self.decoder_start_token_id, - forced_bos_token_id=self.forced_bos_token_id, - forced_eos_token_id=self.forced_eos_token_id, ) def prepare_config_and_inputs_for_common(self): diff --git a/tests/models/mbart/test_modeling_mbart.py b/tests/models/mbart/test_modeling_mbart.py index 5a8263e11969..9401d892daa3 100644 --- a/tests/models/mbart/test_modeling_mbart.py +++ b/tests/models/mbart/test_modeling_mbart.py @@ -120,12 +120,6 @@ def __init__( self.pad_token_id = pad_token_id self.bos_token_id = bos_token_id - # forcing a certain token to be generated, sets all other tokens to -inf - # if however the token to be generated is already at -inf then it can lead token - # `nan` values and thus break generation - self.forced_bos_token_id = None - self.forced_eos_token_id = None - def prepare_config_and_inputs(self): input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size).clamp( @@ -155,8 +149,6 @@ def get_config(self): eos_token_id=self.eos_token_id, bos_token_id=self.bos_token_id, pad_token_id=self.pad_token_id, - forced_bos_token_id=self.forced_bos_token_id, - forced_eos_token_id=self.forced_eos_token_id, ) def prepare_config_and_inputs_for_common(self): diff --git a/tests/models/mobilevit/test_modeling_mobilevit.py b/tests/models/mobilevit/test_modeling_mobilevit.py index 9eb5878500d5..cd4cfa68e5dc 100644 --- a/tests/models/mobilevit/test_modeling_mobilevit.py +++ b/tests/models/mobilevit/test_modeling_mobilevit.py @@ -17,7 +17,7 @@ import unittest from transformers import MobileViTConfig -from transformers.testing_utils import require_torch, require_vision, slow, torch_device +from transformers.testing_utils import is_flaky, require_torch, require_vision, slow, torch_device from transformers.utils import cached_property, is_torch_available, is_vision_available from ...test_configuration_common import ConfigTester @@ -274,6 +274,10 @@ def test_model_from_pretrained(self): model = MobileViTModel.from_pretrained(model_name) self.assertIsNotNone(model) + @is_flaky(description="is_flaky https://github.com/huggingface/transformers/issues/29516") + def test_batching_equivalence(self): + super().test_batching_equivalence() + # We will verify our results on an image of cute cats def prepare_img(): diff --git a/tests/models/pegasus/test_modeling_pegasus.py b/tests/models/pegasus/test_modeling_pegasus.py index f7de1258847d..2bd102b904e3 100644 --- a/tests/models/pegasus/test_modeling_pegasus.py +++ b/tests/models/pegasus/test_modeling_pegasus.py @@ -112,12 +112,6 @@ def __init__( self.pad_token_id = pad_token_id self.bos_token_id = bos_token_id - # forcing a certain token to be generated, sets all other tokens to -inf - # if however the token to be generated is already at -inf then it can lead token - # `nan` values and thus break generation - self.forced_bos_token_id = None - self.forced_eos_token_id = None - def prepare_config_and_inputs(self): input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size).clamp( @@ -165,8 +159,6 @@ def get_config(self): eos_token_id=self.eos_token_id, bos_token_id=self.bos_token_id, pad_token_id=self.pad_token_id, - forced_bos_token_id=self.forced_bos_token_id, - forced_eos_token_id=self.forced_eos_token_id, ) def prepare_config_and_inputs_for_common(self): diff --git a/tests/models/whisper/test_modeling_flax_whisper.py b/tests/models/whisper/test_modeling_flax_whisper.py index 4b8092e800ad..065c6536481d 100644 --- a/tests/models/whisper/test_modeling_flax_whisper.py +++ b/tests/models/whisper/test_modeling_flax_whisper.py @@ -84,7 +84,6 @@ def __init__( decoder_start_token_id=85, num_conv_layers=1, suppress_tokens=None, - begin_suppress_tokens=None, ): self.parent = parent self.batch_size = batch_size @@ -118,7 +117,6 @@ def __init__( self.decoder_start_token_id = decoder_start_token_id self.num_conv_layers = num_conv_layers self.suppress_tokens = suppress_tokens - self.begin_suppress_tokens = begin_suppress_tokens def prepare_config_and_inputs_for_common(self): input_features = floats_tensor([self.batch_size, self.num_mel_bins, self.seq_length], self.vocab_size) @@ -147,7 +145,6 @@ def prepare_config_and_inputs_for_common(self): encoder_ffn_dim=self.encoder_ffn_dim, encoder_layers=self.encoder_layers, suppress_tokens=self.suppress_tokens, - begin_suppress_tokens=self.begin_suppress_tokens, ) inputs_dict = prepare_whisper_inputs_dict(config, input_features, decoder_input_ids) return config, inputs_dict @@ -741,7 +738,6 @@ def __init__( num_mel_bins=80, num_conv_layers=1, suppress_tokens=None, - begin_suppress_tokens=None, classifier_proj_size=4, num_labels=2, is_encoder_decoder=False, @@ -764,7 +760,6 @@ def __init__( self.max_source_positions = max_source_positions self.num_conv_layers = num_conv_layers self.suppress_tokens = suppress_tokens - self.begin_suppress_tokens = begin_suppress_tokens self.classifier_proj_size = classifier_proj_size self.num_labels = num_labels self.is_encoder_decoder = is_encoder_decoder @@ -785,7 +780,6 @@ def get_config(self): decoder_ffn_dim=self.hidden_size, encoder_ffn_dim=self.hidden_size, suppress_tokens=self.suppress_tokens, - begin_suppress_tokens=self.begin_suppress_tokens, classifier_proj_size=self.classifier_proj_size, num_labels=self.num_labels, is_encoder_decoder=self.is_encoder_decoder, diff --git a/tests/models/whisper/test_modeling_tf_whisper.py b/tests/models/whisper/test_modeling_tf_whisper.py index b200671e048f..be311486267d 100644 --- a/tests/models/whisper/test_modeling_tf_whisper.py +++ b/tests/models/whisper/test_modeling_tf_whisper.py @@ -104,7 +104,6 @@ def __init__( decoder_start_token_id=85, num_conv_layers=1, suppress_tokens=None, - begin_suppress_tokens=None, ): self.parent = parent self.batch_size = batch_size @@ -129,7 +128,6 @@ def __init__( self.decoder_start_token_id = decoder_start_token_id self.num_conv_layers = num_conv_layers self.suppress_tokens = suppress_tokens - self.begin_suppress_tokens = begin_suppress_tokens def prepare_config_and_inputs(self): input_features = floats_tensor([self.batch_size, self.num_mel_bins, self.seq_length], self.vocab_size) @@ -166,7 +164,6 @@ def get_config(self): encoder_ffn_dim=self.hidden_size, decoder_start_token_id=self.decoder_start_token_id, suppress_tokens=self.suppress_tokens, - begin_suppress_tokens=self.begin_suppress_tokens, ) def prepare_config_and_inputs_for_common(self): diff --git a/tests/models/whisper/test_modeling_whisper.py b/tests/models/whisper/test_modeling_whisper.py index 6deebf552b91..f3d191b4d3c4 100644 --- a/tests/models/whisper/test_modeling_whisper.py +++ b/tests/models/whisper/test_modeling_whisper.py @@ -218,7 +218,6 @@ def __init__( decoder_start_token_id=85, num_conv_layers=1, suppress_tokens=None, - begin_suppress_tokens=None, ): self.parent = parent self.batch_size = batch_size @@ -243,7 +242,6 @@ def __init__( self.decoder_start_token_id = decoder_start_token_id self.num_conv_layers = num_conv_layers self.suppress_tokens = suppress_tokens - self.begin_suppress_tokens = begin_suppress_tokens def prepare_config_and_inputs(self): input_features = floats_tensor([self.batch_size, self.num_mel_bins, self.seq_length], self.vocab_size) @@ -280,7 +278,6 @@ def get_config(self): encoder_ffn_dim=self.hidden_size, decoder_start_token_id=self.decoder_start_token_id, suppress_tokens=self.suppress_tokens, - begin_suppress_tokens=self.begin_suppress_tokens, ) def prepare_config_and_inputs_for_common(self): @@ -3309,7 +3306,6 @@ def __init__( num_mel_bins=80, num_conv_layers=1, suppress_tokens=None, - begin_suppress_tokens=None, classifier_proj_size=4, num_labels=2, is_encoder_decoder=False, @@ -3332,7 +3328,6 @@ def __init__( self.max_source_positions = max_source_positions self.num_conv_layers = num_conv_layers self.suppress_tokens = suppress_tokens - self.begin_suppress_tokens = begin_suppress_tokens self.classifier_proj_size = classifier_proj_size self.num_labels = num_labels self.is_encoder_decoder = is_encoder_decoder @@ -3353,7 +3348,6 @@ def get_config(self): decoder_ffn_dim=self.hidden_size, encoder_ffn_dim=self.hidden_size, suppress_tokens=self.suppress_tokens, - begin_suppress_tokens=self.begin_suppress_tokens, classifier_proj_size=self.classifier_proj_size, num_labels=self.num_labels, is_encoder_decoder=self.is_encoder_decoder, @@ -3685,7 +3679,6 @@ def __init__( decoder_start_token_id=85, num_conv_layers=1, suppress_tokens=None, - begin_suppress_tokens=None, ): self.parent = parent self.batch_size = batch_size @@ -3709,7 +3702,6 @@ def __init__( self.decoder_start_token_id = decoder_start_token_id self.num_conv_layers = num_conv_layers self.suppress_tokens = suppress_tokens - self.begin_suppress_tokens = begin_suppress_tokens def prepare_config_and_inputs(self): input_features = floats_tensor([self.batch_size, self.num_mel_bins, self.seq_length], self.vocab_size) @@ -3765,7 +3757,6 @@ def get_config(self): encoder_ffn_dim=self.hidden_size, decoder_start_token_id=self.decoder_start_token_id, suppress_tokens=self.suppress_tokens, - begin_suppress_tokens=self.begin_suppress_tokens, ) def prepare_config_and_inputs_for_common(self): diff --git a/tests/utils/test_cli.py b/tests/utils/test_cli.py index b208ff19f1a4..148f091c2794 100644 --- a/tests/utils/test_cli.py +++ b/tests/utils/test_cli.py @@ -18,7 +18,7 @@ import unittest from unittest.mock import patch -from transformers.testing_utils import CaptureStd, is_pt_tf_cross_test, require_torch +from transformers.testing_utils import CaptureStd, require_torch class CLITest(unittest.TestCase): @@ -33,18 +33,6 @@ def test_cli_env(self): self.assertIn("Platform", cs.out) self.assertIn("Using distributed or parallel set-up in script?", cs.out) - @is_pt_tf_cross_test - @patch( - "sys.argv", ["fakeprogrampath", "pt-to-tf", "--model-name", "hf-internal-testing/tiny-random-gptj", "--no-pr"] - ) - def test_cli_pt_to_tf(self): - import transformers.commands.transformers_cli - - shutil.rmtree("/tmp/hf-internal-testing/tiny-random-gptj", ignore_errors=True) # cleans potential past runs - transformers.commands.transformers_cli.main() - - self.assertTrue(os.path.exists("/tmp/hf-internal-testing/tiny-random-gptj/tf_model.h5")) - @require_torch @patch("sys.argv", ["fakeprogrampath", "download", "hf-internal-testing/tiny-random-gptj", "--cache-dir", "/tmp"]) def test_cli_download(self): diff --git a/tests/utils/test_configuration_utils.py b/tests/utils/test_configuration_utils.py index 934e11a78797..6b684867eb87 100644 --- a/tests/utils/test_configuration_utils.py +++ b/tests/utils/test_configuration_utils.py @@ -315,21 +315,19 @@ def test_repo_versioning_before(self): old_configuration = old_transformers.models.auto.AutoConfig.from_pretrained(repo) self.assertEqual(old_configuration.hidden_size, 768) - def test_saving_config_with_custom_generation_kwargs_raises_warning(self): + def test_saving_config_with_custom_generation_kwargs_raises_exception(self): config = BertConfig(min_length=3) # `min_length = 3` is a non-default generation kwarg with tempfile.TemporaryDirectory() as tmp_dir: - with self.assertLogs("transformers.configuration_utils", level="WARNING") as logs: + with self.assertRaises(ValueError): config.save_pretrained(tmp_dir) - self.assertEqual(len(logs.output), 1) - self.assertIn("min_length", logs.output[0]) - def test_has_non_default_generation_parameters(self): + def test_get_non_default_generation_parameters(self): config = BertConfig() - self.assertFalse(config._has_non_default_generation_parameters()) + self.assertFalse(len(config._get_non_default_generation_parameters()) > 0) config = BertConfig(min_length=3) - self.assertTrue(config._has_non_default_generation_parameters()) + self.assertTrue(len(config._get_non_default_generation_parameters()) > 0) config = BertConfig(min_length=0) # `min_length = 0` is a default generation kwarg - self.assertFalse(config._has_non_default_generation_parameters()) + self.assertFalse(len(config._get_non_default_generation_parameters()) > 0) def test_loading_config_do_not_raise_future_warnings(self): """Regression test for https://github.com/huggingface/transformers/issues/31002.""" diff --git a/tests/utils/test_modeling_utils.py b/tests/utils/test_modeling_utils.py index 5dec17f1f477..238a9a1fe4d6 100644 --- a/tests/utils/test_modeling_utils.py +++ b/tests/utils/test_modeling_utils.py @@ -23,6 +23,7 @@ import unittest import unittest.mock as mock import uuid +import warnings from pathlib import Path import requests @@ -1599,14 +1600,30 @@ def test_safetensors_torch_from_torch_sharded(self): for p1, p2 in zip(model.parameters(), new_model.parameters()): self.assertTrue(torch.equal(p1, p2)) - def test_modifying_model_config_causes_warning_saving_generation_config(self): + def test_modifying_model_config_gets_moved_to_generation_config(self): + """ + Calling `model.save_pretrained` should move the changes made to `generate` parameterization in the model config + to the generation config. + """ model = AutoModelForCausalLM.from_pretrained("openai-community/gpt2") - model.config.top_k = 1 - with tempfile.TemporaryDirectory() as tmp_dir: - with self.assertLogs("transformers.modeling_utils", level="WARNING") as logs: + # Initially, the repetition penalty has its default value in `model.config`. The `model.generation_config` will + # have the exact same default + self.assertTrue(model.config.repetition_penalty == 1.0) + self.assertTrue(model.generation_config.repetition_penalty == 1.0) + # If the user attempts to save a custom generation parameter: + model.config.repetition_penalty = 3.0 + with warnings.catch_warnings(record=True) as warning_list: + with tempfile.TemporaryDirectory() as tmp_dir: model.save_pretrained(tmp_dir) - self.assertEqual(len(logs.output), 1) - self.assertIn("Your generation config was originally created from the model config", logs.output[0]) + # 1 - That parameter will be removed from `model.config`. We don't want to use `model.config` to store + # generative parameters, and the old default (1.0) would no longer relect the user's wishes. + self.assertTrue(model.config.repetition_penalty is None) + # 2 - That parameter will be set in `model.generation_config` instead. + self.assertTrue(model.generation_config.repetition_penalty == 3.0) + # 3 - The user will see a warning regarding the custom parameter that has been moved. + self.assertTrue(len(warning_list) == 1) + self.assertTrue("Moving the following attributes" in str(warning_list[0].message)) + self.assertTrue("repetition_penalty" in str(warning_list[0].message)) @require_safetensors def test_model_from_pretrained_from_mlx(self):