From af3efdd71ee5b568dfdb71bb41d316df830147af Mon Sep 17 00:00:00 2001 From: Joao Gante Date: Tue, 13 Aug 2024 15:34:26 +0000 Subject: [PATCH 01/35] deprecated stuff --- docs/source/en/internal/generation_utils.md | 3 - docs/source/ja/internal/generation_utils.md | 3 - docs/source/zh/internal/generation_utils.md | 3 - src/transformers/__init__.py | 2 - src/transformers/commands/pt_to_tf.py | 303 +----------------- src/transformers/configuration_utils.py | 10 +- src/transformers/generation/__init__.py | 2 - src/transformers/generation/logits_process.py | 29 -- .../generation/stopping_criteria.py | 32 -- src/transformers/generation/utils.py | 20 +- src/transformers/modeling_utils.py | 18 -- .../models/bloom/modeling_bloom.py | 2 +- .../models/codegen/modeling_codegen.py | 2 +- src/transformers/models/dbrx/modeling_dbrx.py | 2 +- .../models/falcon/modeling_falcon.py | 2 +- .../models/gemma/modeling_gemma.py | 2 +- src/transformers/models/git/modeling_git.py | 2 +- .../models/gpt_neo/modeling_gpt_neo.py | 2 +- .../models/gpt_neox/modeling_gpt_neox.py | 2 +- src/transformers/models/gptj/modeling_gptj.py | 2 +- .../models/idefics/modeling_idefics.py | 2 +- .../models/llama/modeling_llama.py | 2 +- .../models/mistral/modeling_mistral.py | 2 +- .../models/mixtral/modeling_mixtral.py | 2 +- src/transformers/models/olmo/modeling_olmo.py | 2 +- .../models/persimmon/modeling_persimmon.py | 2 +- src/transformers/models/phi/modeling_phi.py | 2 +- src/transformers/models/phi3/modeling_phi3.py | 2 +- .../models/qwen2/modeling_qwen2.py | 2 +- .../models/qwen2_moe/modeling_qwen2_moe.py | 2 +- .../models/stablelm/modeling_stablelm.py | 2 +- .../models/starcoder2/modeling_starcoder2.py | 2 +- src/transformers/utils/dummy_pt_objects.py | 7 - tests/generation/test_stopping_criteria.py | 16 - 34 files changed, 34 insertions(+), 456 deletions(-) diff --git a/docs/source/en/internal/generation_utils.md b/docs/source/en/internal/generation_utils.md index 0221622c4080..936e4bfb95da 100644 --- a/docs/source/en/internal/generation_utils.md +++ b/docs/source/en/internal/generation_utils.md @@ -140,9 +140,6 @@ generation. [[autodoc]] ForcedEOSTokenLogitsProcessor - __call__ -[[autodoc]] ForceTokensLogitsProcessor - - __call__ - [[autodoc]] HammingDiversityLogitsProcessor - __call__ diff --git a/docs/source/ja/internal/generation_utils.md b/docs/source/ja/internal/generation_utils.md index 9e3ce7799543..1a5cc1dec079 100644 --- a/docs/source/ja/internal/generation_utils.md +++ b/docs/source/ja/internal/generation_utils.md @@ -139,9 +139,6 @@ generation_output[:2] [[autodoc]] ForcedEOSTokenLogitsProcessor - __call__ -[[autodoc]] ForceTokensLogitsProcessor - - __call__ - [[autodoc]] HammingDiversityLogitsProcessor - __call__ diff --git a/docs/source/zh/internal/generation_utils.md b/docs/source/zh/internal/generation_utils.md index 75f28c233ee0..084e2a29dc8c 100644 --- a/docs/source/zh/internal/generation_utils.md +++ b/docs/source/zh/internal/generation_utils.md @@ -133,9 +133,6 @@ generation_output[:2] [[autodoc]] ForcedEOSTokenLogitsProcessor - __call__ -[[autodoc]] ForceTokensLogitsProcessor - - __call__ - [[autodoc]] HammingDiversityLogitsProcessor - __call__ diff --git a/src/transformers/__init__.py b/src/transformers/__init__.py index 1d36e7f8c746..ced2b9997366 100755 --- a/src/transformers/__init__.py +++ b/src/transformers/__init__.py @@ -1276,7 +1276,6 @@ "ExponentialDecayLengthPenalty", "ForcedBOSTokenLogitsProcessor", "ForcedEOSTokenLogitsProcessor", - "ForceTokensLogitsProcessor", "GenerationMixin", "HammingDiversityLogitsProcessor", "InfNanRemoveLogitsProcessor", @@ -6059,7 +6058,6 @@ ExponentialDecayLengthPenalty, ForcedBOSTokenLogitsProcessor, ForcedEOSTokenLogitsProcessor, - ForceTokensLogitsProcessor, GenerationMixin, HammingDiversityLogitsProcessor, InfNanRemoveLogitsProcessor, diff --git a/src/transformers/commands/pt_to_tf.py b/src/transformers/commands/pt_to_tf.py index 4002b5e0eb85..ad0dbd14e15b 100644 --- a/src/transformers/commands/pt_to_tf.py +++ b/src/transformers/commands/pt_to_tf.py @@ -12,45 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -import inspect import os from argparse import ArgumentParser, Namespace -from importlib import import_module -import huggingface_hub -import numpy as np -from packaging import version - -from .. import ( - FEATURE_EXTRACTOR_MAPPING, - IMAGE_PROCESSOR_MAPPING, - PROCESSOR_MAPPING, - TOKENIZER_MAPPING, - AutoConfig, - AutoFeatureExtractor, - AutoImageProcessor, - AutoProcessor, - AutoTokenizer, - is_datasets_available, - is_tf_available, - is_torch_available, -) -from ..utils import TF2_WEIGHTS_INDEX_NAME, TF2_WEIGHTS_NAME, logging +from ..utils import logging from . import BaseTransformersCLICommand -if is_tf_available(): - import tensorflow as tf - - tf.config.experimental.enable_tensor_float_32_execution(False) - -if is_torch_available(): - import torch - -if is_datasets_available(): - from datasets import load_dataset - - MAX_ERROR = 5e-5 # larger error tolerance than in our internal tests, to avoid flaky user-facing errors @@ -136,44 +104,6 @@ def register_subcommand(parser: ArgumentParser): ) train_parser.set_defaults(func=convert_command_factory) - @staticmethod - def find_pt_tf_differences(pt_outputs, tf_outputs): - """ - Compares the TensorFlow and PyTorch outputs, returning a dictionary with all tensor differences. - """ - # 1. All output attributes must be the same - pt_out_attrs = set(pt_outputs.keys()) - tf_out_attrs = set(tf_outputs.keys()) - if pt_out_attrs != tf_out_attrs: - raise ValueError( - f"The model outputs have different attributes, aborting. (Pytorch: {pt_out_attrs}, TensorFlow:" - f" {tf_out_attrs})" - ) - - # 2. For each output attribute, computes the difference - def _find_pt_tf_differences(pt_out, tf_out, differences, attr_name=""): - # If the current attribute is a tensor, it is a leaf and we make the comparison. Otherwise, we will dig in - # recursivelly, keeping the name of the attribute. - if isinstance(pt_out, torch.Tensor): - tensor_difference = np.max(np.abs(pt_out.numpy() - tf_out.numpy())) - differences[attr_name] = tensor_difference - else: - root_name = attr_name - for i, pt_item in enumerate(pt_out): - # If it is a named attribute, we keep the name. Otherwise, just its index. - if isinstance(pt_item, str): - branch_name = root_name + pt_item - tf_item = tf_out[pt_item] - pt_item = pt_out[pt_item] - else: - branch_name = root_name + f"[{i}]" - tf_item = tf_out[i] - differences = _find_pt_tf_differences(pt_item, tf_item, differences, branch_name) - - return differences - - return _find_pt_tf_differences(pt_outputs, tf_outputs, {}) - def __init__( self, model_name: str, @@ -196,237 +126,12 @@ def __init__( self._extra_commit_description = extra_commit_description self._override_model_class = override_model_class - def get_inputs(self, pt_model, tf_dummy_inputs, config): - """ - Returns the right inputs for the model, based on its signature. - """ - - def _get_audio_input(): - ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") - speech_samples = ds.sort("id").select(range(2))[:2]["audio"] - raw_samples = [x["array"] for x in speech_samples] - return raw_samples - - model_config_class = type(pt_model.config) - if model_config_class in PROCESSOR_MAPPING: - processor = AutoProcessor.from_pretrained(self._local_dir) - if model_config_class in TOKENIZER_MAPPING and processor.tokenizer.pad_token is None: - processor.tokenizer.pad_token = processor.tokenizer.eos_token - elif model_config_class in IMAGE_PROCESSOR_MAPPING: - processor = AutoImageProcessor.from_pretrained(self._local_dir) - elif model_config_class in FEATURE_EXTRACTOR_MAPPING: - processor = AutoFeatureExtractor.from_pretrained(self._local_dir) - elif model_config_class in TOKENIZER_MAPPING: - processor = AutoTokenizer.from_pretrained(self._local_dir) - if processor.pad_token is None: - processor.pad_token = processor.eos_token - else: - raise ValueError(f"Unknown data processing type (model config type: {model_config_class})") - - model_forward_signature = set(inspect.signature(pt_model.forward).parameters.keys()) - processor_inputs = {} - if "input_ids" in model_forward_signature: - processor_inputs.update( - { - "text": ["Hi there!", "I am a batch with more than one row and different input lengths."], - "padding": True, - "truncation": True, - } - ) - if "pixel_values" in model_forward_signature: - sample_images = load_dataset("uoft-cs/cifar10", "plain_text", split="test")[:2]["img"] # no-script - processor_inputs.update({"images": sample_images}) - if "input_features" in model_forward_signature: - feature_extractor_signature = inspect.signature(processor.feature_extractor).parameters - # Pad to the largest input length by default but take feature extractor default - # padding value if it exists e.g. "max_length" and is not False or None - if "padding" in feature_extractor_signature: - default_strategy = feature_extractor_signature["padding"].default - if default_strategy is not False and default_strategy is not None: - padding_strategy = default_strategy - else: - padding_strategy = True - else: - padding_strategy = True - processor_inputs.update({"audio": _get_audio_input(), "padding": padding_strategy}) - if "input_values" in model_forward_signature: # Wav2Vec2 audio input - processor_inputs.update({"audio": _get_audio_input(), "padding": True}) - pt_input = processor(**processor_inputs, return_tensors="pt") - tf_input = processor(**processor_inputs, return_tensors="tf") - - # Extra input requirements, in addition to the input modality - if ( - config.is_encoder_decoder - or (hasattr(pt_model, "encoder") and hasattr(pt_model, "decoder")) - or "decoder_input_ids" in tf_dummy_inputs - ): - decoder_input_ids = np.asarray([[1], [1]], dtype=int) * (pt_model.config.decoder_start_token_id or 0) - pt_input.update({"decoder_input_ids": torch.tensor(decoder_input_ids)}) - tf_input.update({"decoder_input_ids": tf.convert_to_tensor(decoder_input_ids)}) - - return pt_input, tf_input - def run(self): - self._logger.warning( - "\n\nConverting PyTorch weights to TensorFlow is deprecated and will be removed in v4.43. " + # TODO (joao): delete file in v4.47 + raise NotImplementedError( + "\n\nConverting PyTorch weights to TensorFlow weights was removed in v4.43. " "Instead, we recommend that you convert PyTorch weights to Safetensors, an improved " "format that can be loaded by any framework, including TensorFlow. For more information, " "please see the Safetensors conversion guide: " "https://huggingface.co/docs/safetensors/en/convert-weights\n\n" ) - # hub version 0.9.0 introduced the possibility of programmatically opening PRs with normal write tokens. - if version.parse(huggingface_hub.__version__) < version.parse("0.9.0"): - raise ImportError( - "The huggingface_hub version must be >= 0.9.0 to use this command. Please update your huggingface_hub" - " installation." - ) - else: - from huggingface_hub import Repository, create_commit - from huggingface_hub._commit_api import CommitOperationAdd - - # Fetch remote data - repo = Repository(local_dir=self._local_dir, clone_from=self._model_name) - - # Load config and get the appropriate architecture -- the latter is needed to convert the head's weights - config = AutoConfig.from_pretrained(self._local_dir) - architectures = config.architectures - if self._override_model_class is not None: - if self._override_model_class.startswith("TF"): - architectures = [self._override_model_class[2:]] - else: - architectures = [self._override_model_class] - try: - pt_class = getattr(import_module("transformers"), architectures[0]) - except AttributeError: - raise ValueError(f"Model class {self._override_model_class} not found in transformers.") - try: - tf_class = getattr(import_module("transformers"), "TF" + architectures[0]) - except AttributeError: - raise ValueError(f"TF model class TF{self._override_model_class} not found in transformers.") - elif architectures is None: # No architecture defined -- use auto classes - pt_class = getattr(import_module("transformers"), "AutoModel") - tf_class = getattr(import_module("transformers"), "TFAutoModel") - self._logger.warning("No detected architecture, using AutoModel/TFAutoModel") - else: # Architecture defined -- use it - if len(architectures) > 1: - raise ValueError(f"More than one architecture was found, aborting. (architectures = {architectures})") - self._logger.warning(f"Detected architecture: {architectures[0]}") - pt_class = getattr(import_module("transformers"), architectures[0]) - try: - tf_class = getattr(import_module("transformers"), "TF" + architectures[0]) - except AttributeError: - raise AttributeError(f"The TensorFlow equivalent of {architectures[0]} doesn't exist in transformers.") - - # Check the TF dummy inputs to see what keys we need in the forward pass - tf_from_pt_model = tf_class.from_config(config) - tf_dummy_inputs = tf_from_pt_model.dummy_inputs - - del tf_from_pt_model # Try to keep only one model in memory at a time - - # Load the model and get some basic inputs - pt_model = pt_class.from_pretrained(self._local_dir) - pt_model.eval() - - pt_input, tf_input = self.get_inputs(pt_model, tf_dummy_inputs, config) - - with torch.no_grad(): - pt_outputs = pt_model(**pt_input, output_hidden_states=True) - del pt_model # will no longer be used, and may have a large memory footprint - - tf_from_pt_model = tf_class.from_pretrained(self._local_dir, from_pt=True) - tf_from_pt_outputs = tf_from_pt_model(**tf_input, output_hidden_states=True, training=False) - - # Confirms that cross loading PT weights into TF worked. - crossload_differences = self.find_pt_tf_differences(pt_outputs, tf_from_pt_outputs) - output_differences = {k: v for k, v in crossload_differences.items() if "hidden" not in k} - hidden_differences = {k: v for k, v in crossload_differences.items() if "hidden" in k} - if len(output_differences) == 0 and architectures is not None: - raise ValueError( - f"Something went wrong -- the config file has architectures ({architectures}), but no model head" - " output was found. All outputs start with 'hidden'" - ) - max_crossload_output_diff = max(output_differences.values()) if output_differences else 0.0 - max_crossload_hidden_diff = max(hidden_differences.values()) - if max_crossload_output_diff > self._max_error or max_crossload_hidden_diff > self._max_error: - raise ValueError( - "The cross-loaded TensorFlow model has different outputs, something went wrong!\n" - + f"\nList of maximum output differences above the threshold ({self._max_error}):\n" - + "\n".join([f"{k}: {v:.3e}" for k, v in output_differences.items() if v > self._max_error]) - + f"\n\nList of maximum hidden layer differences above the threshold ({self._max_error}):\n" - + "\n".join([f"{k}: {v:.3e}" for k, v in hidden_differences.items() if v > self._max_error]) - ) - - # Save the weights in a TF format (if needed) and confirms that the results are still good - tf_weights_path = os.path.join(self._local_dir, TF2_WEIGHTS_NAME) - tf_weights_index_path = os.path.join(self._local_dir, TF2_WEIGHTS_INDEX_NAME) - if (not os.path.exists(tf_weights_path) and not os.path.exists(tf_weights_index_path)) or self._new_weights: - tf_from_pt_model.save_pretrained(self._local_dir) - del tf_from_pt_model # will no longer be used, and may have a large memory footprint - - tf_model = tf_class.from_pretrained(self._local_dir) - tf_outputs = tf_model(**tf_input, output_hidden_states=True) - - conversion_differences = self.find_pt_tf_differences(pt_outputs, tf_outputs) - output_differences = {k: v for k, v in conversion_differences.items() if "hidden" not in k} - hidden_differences = {k: v for k, v in conversion_differences.items() if "hidden" in k} - if len(output_differences) == 0 and architectures is not None: - raise ValueError( - f"Something went wrong -- the config file has architectures ({architectures}), but no model head" - " output was found. All outputs start with 'hidden'" - ) - max_conversion_output_diff = max(output_differences.values()) if output_differences else 0.0 - max_conversion_hidden_diff = max(hidden_differences.values()) - if max_conversion_output_diff > self._max_error or max_conversion_hidden_diff > self._max_error: - raise ValueError( - "The converted TensorFlow model has different outputs, something went wrong!\n" - + f"\nList of maximum output differences above the threshold ({self._max_error}):\n" - + "\n".join([f"{k}: {v:.3e}" for k, v in output_differences.items() if v > self._max_error]) - + f"\n\nList of maximum hidden layer differences above the threshold ({self._max_error}):\n" - + "\n".join([f"{k}: {v:.3e}" for k, v in hidden_differences.items() if v > self._max_error]) - ) - - commit_message = "Update TF weights" if self._new_weights else "Add TF weights" - if self._push: - repo.git_add(auto_lfs_track=True) - repo.git_commit(commit_message) - repo.git_push(blocking=True) # this prints a progress bar with the upload - self._logger.warning(f"TF weights pushed into {self._model_name}") - elif not self._no_pr: - self._logger.warning("Uploading the weights into a new PR...") - commit_descrition = ( - "Model converted by the [`transformers`' `pt_to_tf`" - " CLI](https://github.com/huggingface/transformers/blob/main/src/transformers/commands/pt_to_tf.py). " - "All converted model outputs and hidden layers were validated against its PyTorch counterpart.\n\n" - f"Maximum crossload output difference={max_crossload_output_diff:.3e}; " - f"Maximum crossload hidden layer difference={max_crossload_hidden_diff:.3e};\n" - f"Maximum conversion output difference={max_conversion_output_diff:.3e}; " - f"Maximum conversion hidden layer difference={max_conversion_hidden_diff:.3e};\n" - ) - if self._max_error > MAX_ERROR: - commit_descrition += ( - f"\n\nCAUTION: The maximum admissible error was manually increased to {self._max_error}!" - ) - if self._extra_commit_description: - commit_descrition += "\n\n" + self._extra_commit_description - - # sharded model -> adds all related files (index and .h5 shards) - if os.path.exists(tf_weights_index_path): - operations = [ - CommitOperationAdd(path_in_repo=TF2_WEIGHTS_INDEX_NAME, path_or_fileobj=tf_weights_index_path) - ] - for shard_path in tf.io.gfile.glob(self._local_dir + "/tf_model-*.h5"): - operations += [ - CommitOperationAdd(path_in_repo=os.path.basename(shard_path), path_or_fileobj=shard_path) - ] - else: - operations = [CommitOperationAdd(path_in_repo=TF2_WEIGHTS_NAME, path_or_fileobj=tf_weights_path)] - - hub_pr_url = create_commit( - repo_id=self._model_name, - operations=operations, - commit_message=commit_message, - commit_description=commit_descrition, - repo_type="model", - create_pr=True, - ).pr_url - self._logger.warning(f"PR open in {hub_pr_url}") diff --git a/src/transformers/configuration_utils.py b/src/transformers/configuration_utils.py index 2f84bc29aee2..b12d6a43a671 100755 --- a/src/transformers/configuration_utils.py +++ b/src/transformers/configuration_utils.py @@ -445,11 +445,11 @@ def save_pretrained(self, save_directory: Union[str, os.PathLike], push_to_hub: if hasattr(self, parameter_name) and getattr(self, parameter_name) != default_value: non_default_generation_parameters[parameter_name] = getattr(self, parameter_name) if len(non_default_generation_parameters) > 0: - logger.warning( - "Some non-default generation parameters are set in the model config. These should go into a " - "GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) " - "instead. This warning will be raised to an exception in v4.41.\n" - f"Non-default generation parameters: {str(non_default_generation_parameters)}" + raise ValueError( + "Some non-default generation parameters are set in the model config. These should go into either a) " + "`model.generation_config` (as opposed to `model.config`); OR b) a GenerationConfig file " + "(https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) " + f"\nNon-default generation parameters: {str(non_default_generation_parameters)}" ) os.makedirs(save_directory, exist_ok=True) diff --git a/src/transformers/generation/__init__.py b/src/transformers/generation/__init__.py index 6880321d6326..faf5266b84ae 100644 --- a/src/transformers/generation/__init__.py +++ b/src/transformers/generation/__init__.py @@ -55,7 +55,6 @@ "ExponentialDecayLengthPenalty", "ForcedBOSTokenLogitsProcessor", "ForcedEOSTokenLogitsProcessor", - "ForceTokensLogitsProcessor", "HammingDiversityLogitsProcessor", "InfNanRemoveLogitsProcessor", "LogitNormalization", @@ -201,7 +200,6 @@ ExponentialDecayLengthPenalty, ForcedBOSTokenLogitsProcessor, ForcedEOSTokenLogitsProcessor, - ForceTokensLogitsProcessor, HammingDiversityLogitsProcessor, InfNanRemoveLogitsProcessor, LogitNormalization, diff --git a/src/transformers/generation/logits_process.py b/src/transformers/generation/logits_process.py index 7f89e239245b..e9ba45606829 100644 --- a/src/transformers/generation/logits_process.py +++ b/src/transformers/generation/logits_process.py @@ -15,7 +15,6 @@ import inspect import math -import warnings from typing import Callable, Dict, Iterable, List, Optional, Tuple, Union import numpy as np @@ -1844,34 +1843,6 @@ def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> to return scores -class ForceTokensLogitsProcessor(LogitsProcessor): - r""" - This processor takes a list of pairs of integers which indicates a mapping from generation indices to token - indices that will be forced before generation. The processor will set their log probs to `inf` so that they are - sampled at their corresponding index. Originally created for - [Whisper](https://huggingface.co/docs/transformers/model_doc/whisper). - """ - - def __init__(self, force_token_map: List[List[int]], _has_warned: Optional[bool] = False): - self.force_token_map = dict(force_token_map) - if not _has_warned: - # TODO(Sanchit): remove this processor entirely in v4.40 - warnings.warn( - "This `ForceTokensLogitsProcessor` has been deprecated and will be removed in v4.40. Should you need to provide prompt ids for generation, specify `input_ids` to the generate method for decoder-only models, or `decoder_input_ids` for encoder-decoder models.", - FutureWarning, - ) - - @add_start_docstrings(LOGITS_PROCESSOR_INPUTS_DOCSTRING) - def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> torch.FloatTensor: - generation_idx = input_ids.shape[-1] - current_token = self.force_token_map.get(generation_idx, None) - scores_processed = scores - if current_token is not None: - scores_processed = torch.full_like(scores, -float("inf")) - scores_processed[:, current_token] = 0 - return scores_processed - - class WhisperTimeStampLogitsProcessor(LogitsProcessor): r""" diff --git a/src/transformers/generation/stopping_criteria.py b/src/transformers/generation/stopping_criteria.py index f8e94f6f86a0..961b6d6f5e43 100644 --- a/src/transformers/generation/stopping_criteria.py +++ b/src/transformers/generation/stopping_criteria.py @@ -85,36 +85,6 @@ def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwa return torch.full((input_ids.shape[0],), is_done, device=input_ids.device, dtype=torch.bool) -class MaxNewTokensCriteria(StoppingCriteria): - """ - This class can be used to stop generation whenever the generated number of tokens exceeds `max_new_tokens`. Keep in - mind for decoder-only type of transformers, this will **not** include the initial prompted tokens. This is very - close to `MaxLengthCriteria` but ignores the number of initial tokens. - - Args: - start_length (`int`): - The number of initial tokens. - max_new_tokens (`int`): - The maximum number of tokens to generate. - """ - - def __init__(self, start_length: int, max_new_tokens: int): - warnings.warn( - "The class `MaxNewTokensCriteria` is deprecated and will be removed in v4.43. " - f"Please use `MaxLengthCriteria(max_length={start_length + max_new_tokens})` " - "with `max_length = start_length + max_new_tokens` instead.", - FutureWarning, - ) - self.start_length = start_length - self.max_new_tokens = max_new_tokens - self.max_length = start_length + max_new_tokens - - @add_start_docstrings(STOPPING_CRITERIA_INPUTS_DOCSTRING) - def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> torch.BoolTensor: - is_done = input_ids.shape[-1] >= self.max_length - return torch.full((input_ids.shape[0],), is_done, device=input_ids.device, dtype=torch.bool) - - class MaxTimeCriteria(StoppingCriteria): """ This class can be used to stop generation whenever the full generation exceeds some amount of time. By default, the @@ -516,8 +486,6 @@ def max_length(self) -> Optional[int]: for stopping_criterium in self: if isinstance(stopping_criterium, MaxLengthCriteria): return stopping_criterium.max_length - elif isinstance(stopping_criterium, MaxNewTokensCriteria): - return stopping_criterium.max_length return None diff --git a/src/transformers/generation/utils.py b/src/transformers/generation/utils.py index aee9dc4f55e7..5e93a115acd5 100644 --- a/src/transformers/generation/utils.py +++ b/src/transformers/generation/utils.py @@ -76,7 +76,6 @@ ExponentialDecayLengthPenalty, ForcedBOSTokenLogitsProcessor, ForcedEOSTokenLogitsProcessor, - ForceTokensLogitsProcessor, HammingDiversityLogitsProcessor, InfNanRemoveLogitsProcessor, LogitNormalization, @@ -876,9 +875,6 @@ def _get_logits_processor( if (input_ids_seq_length > 1 or generation_config.forced_bos_token_id is None) else begin_index + 1 ) - if generation_config.forced_decoder_ids is not None: - # generation starts after the last token that is forced - begin_index += generation_config.forced_decoder_ids[-1][0] processors.append( SuppressTokensAtBeginLogitsProcessor( generation_config.begin_suppress_tokens, @@ -887,12 +883,11 @@ def _get_logits_processor( ) ) if generation_config.forced_decoder_ids is not None: - # TODO(Sanchit): deprecate in v4.40 by removing this logic - warnings.warn( - "You have explicitly specified `forced_decoder_ids`. This functionality has been deprecated and will throw an error in v4.40. Please remove the `forced_decoder_ids` argument in favour of `input_ids` or `decoder_input_ids` respectively.", - FutureWarning, + # TODO (sanchit): move this exception to GenerationConfig.validate() when TF & FLAX are aligned with PT + raise ValueError( + "You have explicitly specified `forced_decoder_ids`. Please remove the `forced_decoder_ids` argument " + "in favour of `input_ids` or `decoder_input_ids` respectively.", ) - processors.append(ForceTokensLogitsProcessor(generation_config.forced_decoder_ids, _has_warned=True)) if generation_config.watermarking_config is not None: processors.append( WatermarkLogitsProcessor( @@ -2234,13 +2229,6 @@ def heal_tokens( return input_ids - def contrastive_search(self, *args, **kwargs): - logger.warning_once( - "Calling `contrastive_search` directly is deprecated and will be removed in v4.41. Use `generate` or a " - "custom generation loop instead.", - ) - return self._contrastive_search(*args, **kwargs) - def _dola_decoding( self, input_ids: torch.LongTensor, diff --git a/src/transformers/modeling_utils.py b/src/transformers/modeling_utils.py index b92d4b447f19..f4d6e4e6ba8e 100755 --- a/src/transformers/modeling_utils.py +++ b/src/transformers/modeling_utils.py @@ -2573,24 +2573,6 @@ def save_pretrained( if not _hf_peft_config_loaded: model_to_save.config.save_pretrained(save_directory) if self.can_generate(): - # generation config built from the model config + the model config holds generation kwargs -> generate - # may revert to legacy behavior if the two don't match - if ( - model_to_save.generation_config._from_model_config - and model_to_save.config._has_non_default_generation_parameters() - ): - new_generation_config = GenerationConfig.from_model_config(model_to_save.config) - if new_generation_config != model_to_save.generation_config: - logger.warning( - "Your generation config was originally created from the model config, but the model " - "config has changed since then. Unless you pass the `generation_config` argument to this " - "model's `generate` calls, they will revert to the legacy behavior where the base " - "`generate` parameterization is loaded from the model config instead. " - "To avoid this behavior and this warning, we recommend you to overwrite the generation " - "config model attribute before calling the model's `save_pretrained`, preferably also " - "removing any generation kwargs from the model config. This warning will be raised to an " - "exception in v4.41." - ) model_to_save.generation_config.save_pretrained(save_directory) if _hf_peft_config_loaded: diff --git a/src/transformers/models/bloom/modeling_bloom.py b/src/transformers/models/bloom/modeling_bloom.py index c1caae6c6857..e365744f8b9e 100644 --- a/src/transformers/models/bloom/modeling_bloom.py +++ b/src/transformers/models/bloom/modeling_bloom.py @@ -693,7 +693,7 @@ def forward( past_key_values = DynamicCache.from_legacy_cache(past_key_values) logger.warning_once( "Using `past_key_values` as a tuple is deprecated and will be removed in v4.45. " - "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)" + "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/internal/generation_utils#transformers.Cache)" ) batch_size, seq_length, _ = inputs_embeds.shape diff --git a/src/transformers/models/codegen/modeling_codegen.py b/src/transformers/models/codegen/modeling_codegen.py index 1920f350f559..e668a0dc0631 100644 --- a/src/transformers/models/codegen/modeling_codegen.py +++ b/src/transformers/models/codegen/modeling_codegen.py @@ -532,7 +532,7 @@ def forward( if not self.training: logger.warning_once( "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.45. " - "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)" + "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/internal/generation_utils#transformers.Cache)" ) seq_length = inputs_embeds.shape[1] diff --git a/src/transformers/models/dbrx/modeling_dbrx.py b/src/transformers/models/dbrx/modeling_dbrx.py index 3486d5ed3ab0..1d8f011c3f6a 100644 --- a/src/transformers/models/dbrx/modeling_dbrx.py +++ b/src/transformers/models/dbrx/modeling_dbrx.py @@ -1066,7 +1066,7 @@ def forward( past_key_values = DynamicCache.from_legacy_cache(past_key_values) logger.warning_once( "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. " - "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)" + "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/internal/generation_utils#transformers.Cache)" ) if cache_position is None: diff --git a/src/transformers/models/falcon/modeling_falcon.py b/src/transformers/models/falcon/modeling_falcon.py index edaef78f9286..a9acd171c3ae 100644 --- a/src/transformers/models/falcon/modeling_falcon.py +++ b/src/transformers/models/falcon/modeling_falcon.py @@ -1029,7 +1029,7 @@ def forward( if not self.training: logger.warning_once( "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.45. " - "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)" + "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/internal/generation_utils#transformers.Cache)" ) alibi = None diff --git a/src/transformers/models/gemma/modeling_gemma.py b/src/transformers/models/gemma/modeling_gemma.py index a05d2c059e21..c648dee8fb56 100644 --- a/src/transformers/models/gemma/modeling_gemma.py +++ b/src/transformers/models/gemma/modeling_gemma.py @@ -861,7 +861,7 @@ def forward( past_key_values = DynamicCache.from_legacy_cache(past_key_values) logger.warning_once( "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. " - "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)" + "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/internal/generation_utils#transformers.Cache)" ) # decoder layers diff --git a/src/transformers/models/git/modeling_git.py b/src/transformers/models/git/modeling_git.py index 581f2b3947b4..4807289c927c 100644 --- a/src/transformers/models/git/modeling_git.py +++ b/src/transformers/models/git/modeling_git.py @@ -423,7 +423,7 @@ def forward( past_key_values = DynamicCache.from_legacy_cache(past_key_values) logger.warning_once( "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.45. " - "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)" + "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/internal/generation_utils#transformers.Cache)" ) all_hidden_states = () if output_hidden_states else None diff --git a/src/transformers/models/gpt_neo/modeling_gpt_neo.py b/src/transformers/models/gpt_neo/modeling_gpt_neo.py index 3a606c37b31c..65144ad0c0f1 100755 --- a/src/transformers/models/gpt_neo/modeling_gpt_neo.py +++ b/src/transformers/models/gpt_neo/modeling_gpt_neo.py @@ -747,7 +747,7 @@ def forward( if not self.training: logger.warning_once( "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.45. " - "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)" + "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/internal/generation_utils#transformers.Cache)" ) seq_length = inputs_embeds.shape[1] diff --git a/src/transformers/models/gpt_neox/modeling_gpt_neox.py b/src/transformers/models/gpt_neox/modeling_gpt_neox.py index 22fbb0429f59..5d21f2d2a725 100755 --- a/src/transformers/models/gpt_neox/modeling_gpt_neox.py +++ b/src/transformers/models/gpt_neox/modeling_gpt_neox.py @@ -928,7 +928,7 @@ def forward( if not self.training: logger.warning_once( "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.45. " - "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)" + "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/internal/generation_utils#transformers.Cache)" ) seq_length = inputs_embeds.shape[1] diff --git a/src/transformers/models/gptj/modeling_gptj.py b/src/transformers/models/gptj/modeling_gptj.py index 82540fe98ec7..ba0f319791e4 100644 --- a/src/transformers/models/gptj/modeling_gptj.py +++ b/src/transformers/models/gptj/modeling_gptj.py @@ -819,7 +819,7 @@ def forward( if not self.training: logger.warning_once( "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.45. " - "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)" + "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/internal/generation_utils#transformers.Cache)" ) seq_length = inputs_embeds.shape[1] diff --git a/src/transformers/models/idefics/modeling_idefics.py b/src/transformers/models/idefics/modeling_idefics.py index 3532219f3d6c..b4c24a46bb68 100644 --- a/src/transformers/models/idefics/modeling_idefics.py +++ b/src/transformers/models/idefics/modeling_idefics.py @@ -1243,7 +1243,7 @@ def forward( if not self.training: logger.warning_once( "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.45. " - "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)" + "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/internal/generation_utils#transformers.Cache)" ) return_legacy_cache = True past_key_values = DynamicCache.from_legacy_cache(past_key_values) diff --git a/src/transformers/models/llama/modeling_llama.py b/src/transformers/models/llama/modeling_llama.py index 8716d27f5481..293ce1dd7f6b 100644 --- a/src/transformers/models/llama/modeling_llama.py +++ b/src/transformers/models/llama/modeling_llama.py @@ -950,7 +950,7 @@ def forward( past_key_values = DynamicCache.from_legacy_cache(past_key_values) logger.warning_once( "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. " - "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)" + "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/internal/generation_utils#transformers.Cache)" ) if cache_position is None: diff --git a/src/transformers/models/mistral/modeling_mistral.py b/src/transformers/models/mistral/modeling_mistral.py index 1a2b732e85e4..7ee8a12e74cb 100644 --- a/src/transformers/models/mistral/modeling_mistral.py +++ b/src/transformers/models/mistral/modeling_mistral.py @@ -766,7 +766,7 @@ def forward( return_legacy_cache = True logger.warning_once( "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. " - "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)" + "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/internal/generation_utils#transformers.Cache)" ) if cache_position is None: diff --git a/src/transformers/models/mixtral/modeling_mixtral.py b/src/transformers/models/mixtral/modeling_mixtral.py index 522b6db7bcc7..d9ca3be7b0d4 100644 --- a/src/transformers/models/mixtral/modeling_mixtral.py +++ b/src/transformers/models/mixtral/modeling_mixtral.py @@ -1022,7 +1022,7 @@ def forward( past_key_values = DynamicCache.from_legacy_cache(past_key_values) logger.warning_once( "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. " - "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)" + "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/internal/generation_utils#transformers.Cache)" ) if inputs_embeds is None: diff --git a/src/transformers/models/olmo/modeling_olmo.py b/src/transformers/models/olmo/modeling_olmo.py index 1940660f61b5..c185112f318c 100644 --- a/src/transformers/models/olmo/modeling_olmo.py +++ b/src/transformers/models/olmo/modeling_olmo.py @@ -872,7 +872,7 @@ def forward( past_key_values = DynamicCache.from_legacy_cache(past_key_values) logger.warning_once( "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. " - "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)" + "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/internal/generation_utils#transformers.Cache)" ) if cache_position is None: diff --git a/src/transformers/models/persimmon/modeling_persimmon.py b/src/transformers/models/persimmon/modeling_persimmon.py index 1e4f56c0674d..e6f3f491d309 100644 --- a/src/transformers/models/persimmon/modeling_persimmon.py +++ b/src/transformers/models/persimmon/modeling_persimmon.py @@ -682,7 +682,7 @@ def forward( past_key_values = DynamicCache.from_legacy_cache(past_key_values) logger.warning_once( "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. " - "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)" + "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/internal/generation_utils#transformers.Cache)" ) if inputs_embeds is None: diff --git a/src/transformers/models/phi/modeling_phi.py b/src/transformers/models/phi/modeling_phi.py index 6d63c0ea7e8e..f53ae35ca4ce 100644 --- a/src/transformers/models/phi/modeling_phi.py +++ b/src/transformers/models/phi/modeling_phi.py @@ -966,7 +966,7 @@ def forward( past_key_values = DynamicCache.from_legacy_cache(past_key_values) logger.warning_once( "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. " - "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)" + "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/internal/generation_utils#transformers.Cache)" ) if inputs_embeds is None: diff --git a/src/transformers/models/phi3/modeling_phi3.py b/src/transformers/models/phi3/modeling_phi3.py index 08417fcabfaa..d6788c5cc350 100644 --- a/src/transformers/models/phi3/modeling_phi3.py +++ b/src/transformers/models/phi3/modeling_phi3.py @@ -1007,7 +1007,7 @@ def forward( past_key_values = DynamicCache.from_legacy_cache(past_key_values) logger.warning_once( "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. " - "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)" + "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/internal/generation_utils#transformers.Cache)" ) if inputs_embeds is None: diff --git a/src/transformers/models/qwen2/modeling_qwen2.py b/src/transformers/models/qwen2/modeling_qwen2.py index 28b414b1901b..59fe54819df1 100644 --- a/src/transformers/models/qwen2/modeling_qwen2.py +++ b/src/transformers/models/qwen2/modeling_qwen2.py @@ -871,7 +871,7 @@ def forward( past_key_values = DynamicCache.from_legacy_cache(past_key_values) logger.warning_once( "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. " - "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)" + "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/internal/generation_utils#transformers.Cache)" ) if inputs_embeds is None: diff --git a/src/transformers/models/qwen2_moe/modeling_qwen2_moe.py b/src/transformers/models/qwen2_moe/modeling_qwen2_moe.py index 12ebe26e058d..f1f8ca3ff532 100644 --- a/src/transformers/models/qwen2_moe/modeling_qwen2_moe.py +++ b/src/transformers/models/qwen2_moe/modeling_qwen2_moe.py @@ -1033,7 +1033,7 @@ def forward( past_key_values = DynamicCache.from_legacy_cache(past_key_values) logger.warning_once( "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. " - "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)" + "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/internal/generation_utils#transformers.Cache)" ) if inputs_embeds is None: diff --git a/src/transformers/models/stablelm/modeling_stablelm.py b/src/transformers/models/stablelm/modeling_stablelm.py index 988948a9a827..ae317af37875 100755 --- a/src/transformers/models/stablelm/modeling_stablelm.py +++ b/src/transformers/models/stablelm/modeling_stablelm.py @@ -959,7 +959,7 @@ def forward( past_key_values = DynamicCache.from_legacy_cache(past_key_values) logger.warning_once( "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. " - "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)" + "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/internal/generation_utils#transformers.Cache)" ) if inputs_embeds is None: diff --git a/src/transformers/models/starcoder2/modeling_starcoder2.py b/src/transformers/models/starcoder2/modeling_starcoder2.py index d51077b04254..21469e9d2223 100644 --- a/src/transformers/models/starcoder2/modeling_starcoder2.py +++ b/src/transformers/models/starcoder2/modeling_starcoder2.py @@ -844,7 +844,7 @@ def forward( past_key_values = DynamicCache.from_legacy_cache(past_key_values) logger.warning_once( "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. " - "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)" + "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/internal/generation_utils#transformers.Cache)" ) if inputs_embeds is None: diff --git a/src/transformers/utils/dummy_pt_objects.py b/src/transformers/utils/dummy_pt_objects.py index 4732ecea8611..18df9ad6193f 100644 --- a/src/transformers/utils/dummy_pt_objects.py +++ b/src/transformers/utils/dummy_pt_objects.py @@ -289,13 +289,6 @@ def __init__(self, *args, **kwargs): requires_backends(self, ["torch"]) -class ForceTokensLogitsProcessor(metaclass=DummyObject): - _backends = ["torch"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch"]) - - class GenerationMixin(metaclass=DummyObject): _backends = ["torch"] diff --git a/tests/generation/test_stopping_criteria.py b/tests/generation/test_stopping_criteria.py index ddf9a1c9379e..a04dac96169e 100644 --- a/tests/generation/test_stopping_criteria.py +++ b/tests/generation/test_stopping_criteria.py @@ -28,7 +28,6 @@ from transformers.generation import ( EosTokenCriteria, MaxLengthCriteria, - MaxNewTokensCriteria, MaxTimeCriteria, StoppingCriteriaList, StopStringCriteria, @@ -76,21 +75,6 @@ def test_max_length_criteria(self): input_ids, scores = self._get_tensors(10) self.assertTrue(all(criteria(input_ids, scores))) - def test_max_new_tokens_criteria(self): - criteria = MaxNewTokensCriteria(start_length=5, max_new_tokens=5) - - input_ids, scores = self._get_tensors(5) - self.assertFalse(all(criteria(input_ids, scores))) - - input_ids, scores = self._get_tensors(9) - self.assertFalse(all(criteria(input_ids, scores))) - - input_ids, scores = self._get_tensors(10) - self.assertTrue(all(criteria(input_ids, scores))) - - criteria_list = StoppingCriteriaList([criteria]) - self.assertEqual(criteria_list.max_length, 10) - def test_max_time_criteria(self): input_ids, scores = self._get_tensors(5) From 9450f0d2674df1ad5e03b1d037b5b503c7998f70 Mon Sep 17 00:00:00 2001 From: Joao Gante Date: Tue, 13 Aug 2024 16:36:24 +0000 Subject: [PATCH 02/35] update test --- tests/utils/test_configuration_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/utils/test_configuration_utils.py b/tests/utils/test_configuration_utils.py index 934e11a78797..6451f20c3ef6 100644 --- a/tests/utils/test_configuration_utils.py +++ b/tests/utils/test_configuration_utils.py @@ -315,10 +315,10 @@ def test_repo_versioning_before(self): old_configuration = old_transformers.models.auto.AutoConfig.from_pretrained(repo) self.assertEqual(old_configuration.hidden_size, 768) - def test_saving_config_with_custom_generation_kwargs_raises_warning(self): + def test_saving_config_with_custom_generation_kwargs_raises_exception(self): config = BertConfig(min_length=3) # `min_length = 3` is a non-default generation kwarg with tempfile.TemporaryDirectory() as tmp_dir: - with self.assertLogs("transformers.configuration_utils", level="WARNING") as logs: + with self.assertRaises(ValueError) config.save_pretrained(tmp_dir) self.assertEqual(len(logs.output), 1) self.assertIn("min_length", logs.output[0]) From a95b705e3a052cfd0325d1dad224a8db70509757 Mon Sep 17 00:00:00 2001 From: Joao Gante Date: Tue, 13 Aug 2024 16:36:36 +0000 Subject: [PATCH 03/35] update test --- tests/utils/test_configuration_utils.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/utils/test_configuration_utils.py b/tests/utils/test_configuration_utils.py index 6451f20c3ef6..f00f8f04c3f2 100644 --- a/tests/utils/test_configuration_utils.py +++ b/tests/utils/test_configuration_utils.py @@ -318,10 +318,8 @@ def test_repo_versioning_before(self): def test_saving_config_with_custom_generation_kwargs_raises_exception(self): config = BertConfig(min_length=3) # `min_length = 3` is a non-default generation kwarg with tempfile.TemporaryDirectory() as tmp_dir: - with self.assertRaises(ValueError) + with self.assertRaises(ValueError): config.save_pretrained(tmp_dir) - self.assertEqual(len(logs.output), 1) - self.assertIn("min_length", logs.output[0]) def test_has_non_default_generation_parameters(self): config = BertConfig() From ba5d9856676b8209bfdd0a64c625ec5a775d04c1 Mon Sep 17 00:00:00 2001 From: Joao Gante Date: Tue, 13 Aug 2024 17:13:16 +0000 Subject: [PATCH 04/35] automagically fix the bad configs --- src/transformers/configuration_utils.py | 28 ++++++++++++++++++------- src/transformers/modeling_utils.py | 12 +++++++++++ 2 files changed, 32 insertions(+), 8 deletions(-) diff --git a/src/transformers/configuration_utils.py b/src/transformers/configuration_utils.py index b12d6a43a671..e01017b71e92 100755 --- a/src/transformers/configuration_utils.py +++ b/src/transformers/configuration_utils.py @@ -440,10 +440,7 @@ def save_pretrained(self, save_directory: Union[str, os.PathLike], push_to_hub: if os.path.isfile(save_directory): raise AssertionError(f"Provided path ({save_directory}) should be a directory, not a file") - non_default_generation_parameters = {} - for parameter_name, default_value in self._get_generation_defaults().items(): - if hasattr(self, parameter_name) and getattr(self, parameter_name) != default_value: - non_default_generation_parameters[parameter_name] = getattr(self, parameter_name) + non_default_generation_parameters = self._get_non_default_generation_parameters() if len(non_default_generation_parameters) > 0: raise ValueError( "Some non-default generation parameters are set in the model config. These should go into either a) " @@ -1078,14 +1075,29 @@ def _get_generation_defaults() -> Dict[str, Any]: "begin_suppress_tokens": None, } + def _get_non_default_generation_parameters(self) -> Dict[str, Any]: + """ + Gets the non-default generation parameters on the PretrainedConfig instance + """ + non_default_generation_parameters = {} + default_config = self.__class__() + for parameter_name, default_value in self._get_generation_defaults().items(): + if hasattr(self, parameter_name): + # Two cases in which is okay for the model config to hold generation config parameters: + # 1. The parameter is set to a default generation value (from `generate`'s perspective, it's the same + # as if nothing is set) + is_default_generation_value = getattr(self, parameter_name) == default_value + # 2. The parameter is set as default in the model config (BC support for models like BART) + is_default_in_config = getattr(self, parameter_name) == getattr(default_config, parameter_name) + if not (is_default_generation_value or is_default_in_config): + non_default_generation_parameters[parameter_name] = getattr(self, parameter_name) + return non_default_generation_parameters + def _has_non_default_generation_parameters(self) -> bool: """ Whether or not this instance holds non-default generation parameters. """ - for parameter_name, default_value in self._get_generation_defaults().items(): - if hasattr(self, parameter_name) and getattr(self, parameter_name) != default_value: - return True - return False + return len(self._get_non_default_generation_parameters()) > 0 def get_configuration_file(configuration_files: List[str]) -> str: diff --git a/src/transformers/modeling_utils.py b/src/transformers/modeling_utils.py index f4d6e4e6ba8e..88c73cec1bc1 100755 --- a/src/transformers/modeling_utils.py +++ b/src/transformers/modeling_utils.py @@ -2571,6 +2571,18 @@ def save_pretrained( # Save the config if is_main_process: if not _hf_peft_config_loaded: + # If the model config has set attributes that should be in the generation config, move them there. + misplaced_generation_parameters = model_to_save.config._get_non_default_generation_parameters() + if self.can_generate() and len(misplaced_generation_parameters) > 0: + logger.warning( + "Moving the following attributes in the config to the generation config: " + f"{misplaced_generation_parameters}" + ) + default_generation_arguments = model_to_save.config._get_generation_defaults() + for param_name, param_value in misplaced_generation_parameters.items(): + setattr(model_to_save.generation_config, param_name, param_value) + setattr(model_to_save.config, param_name, default_generation_arguments[param_name]) + model_to_save.config.save_pretrained(save_directory) if self.can_generate(): model_to_save.generation_config.save_pretrained(save_directory) From 1b6c637c94837b7021b6b7ebfbd6e18a8bb550fc Mon Sep 17 00:00:00 2001 From: Joao Gante Date: Tue, 13 Aug 2024 17:49:40 +0000 Subject: [PATCH 05/35] fix test --- tests/utils/test_modeling_utils.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/tests/utils/test_modeling_utils.py b/tests/utils/test_modeling_utils.py index 5dec17f1f477..f90d18b09553 100644 --- a/tests/utils/test_modeling_utils.py +++ b/tests/utils/test_modeling_utils.py @@ -1599,14 +1599,17 @@ def test_safetensors_torch_from_torch_sharded(self): for p1, p2 in zip(model.parameters(), new_model.parameters()): self.assertTrue(torch.equal(p1, p2)) - def test_modifying_model_config_causes_warning_saving_generation_config(self): + def test_modifying_model_config_gets_moved_to_generation_config(self): + """ + Calling `model.save_pretrained` should move the changes made to `generate` parameterization in the model config + to the generation config. + """ model = AutoModelForCausalLM.from_pretrained("openai-community/gpt2") - model.config.top_k = 1 + model.config.repetition_penalty = 3.0 with tempfile.TemporaryDirectory() as tmp_dir: - with self.assertLogs("transformers.modeling_utils", level="WARNING") as logs: - model.save_pretrained(tmp_dir) - self.assertEqual(len(logs.output), 1) - self.assertIn("Your generation config was originally created from the model config", logs.output[0]) + model.save_pretrained(tmp_dir) + self.assertTrue(model.config.repetition_penalty != 3.0) + self.assertTrue(model.generation_config.repetition_penalty == 3.0) @require_safetensors def test_model_from_pretrained_from_mlx(self): From d68137f1ed9c1d23ecb550701cfd153b5cb7bece Mon Sep 17 00:00:00 2001 From: Joao Gante Date: Wed, 14 Aug 2024 11:08:22 +0000 Subject: [PATCH 06/35] handle the case of composite model configs (which have no defaults) --- =1.4.5 | 0 src/transformers/configuration_utils.py | 13 +++++++++++-- 2 files changed, 11 insertions(+), 2 deletions(-) create mode 100644 =1.4.5 diff --git a/=1.4.5 b/=1.4.5 new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/src/transformers/configuration_utils.py b/src/transformers/configuration_utils.py index e01017b71e92..ac51ab5acc2c 100755 --- a/src/transformers/configuration_utils.py +++ b/src/transformers/configuration_utils.py @@ -1080,7 +1080,13 @@ def _get_non_default_generation_parameters(self) -> Dict[str, Any]: Gets the non-default generation parameters on the PretrainedConfig instance """ non_default_generation_parameters = {} - default_config = self.__class__() + + # Some composite models don't have a default config + try: + default_config = self.__class__() + except ValueError: + default_config = None + for parameter_name, default_value in self._get_generation_defaults().items(): if hasattr(self, parameter_name): # Two cases in which is okay for the model config to hold generation config parameters: @@ -1088,7 +1094,10 @@ def _get_non_default_generation_parameters(self) -> Dict[str, Any]: # as if nothing is set) is_default_generation_value = getattr(self, parameter_name) == default_value # 2. The parameter is set as default in the model config (BC support for models like BART) - is_default_in_config = getattr(self, parameter_name) == getattr(default_config, parameter_name) + if default_config is not None: + is_default_in_config = getattr(self, parameter_name) == getattr(default_config, parameter_name) + else: + is_default_in_config = False if not (is_default_generation_value or is_default_in_config): non_default_generation_parameters[parameter_name] = getattr(self, parameter_name) return non_default_generation_parameters From 600464785d5ec0cb39d8f330e14bce940ad50e18 Mon Sep 17 00:00:00 2001 From: Joao Gante Date: Wed, 14 Aug 2024 11:08:44 +0000 Subject: [PATCH 07/35] nit --- =1.4.5 | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 =1.4.5 diff --git a/=1.4.5 b/=1.4.5 deleted file mode 100644 index e69de29bb2d1..000000000000 From 40c7a5e6310fd4cd458a4a2772c21792362d9470 Mon Sep 17 00:00:00 2001 From: Joao Gante Date: Wed, 14 Aug 2024 11:10:16 +0000 Subject: [PATCH 08/35] boolean should be True --- src/transformers/configuration_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/configuration_utils.py b/src/transformers/configuration_utils.py index ac51ab5acc2c..40b3d68684d1 100755 --- a/src/transformers/configuration_utils.py +++ b/src/transformers/configuration_utils.py @@ -1097,7 +1097,7 @@ def _get_non_default_generation_parameters(self) -> Dict[str, Any]: if default_config is not None: is_default_in_config = getattr(self, parameter_name) == getattr(default_config, parameter_name) else: - is_default_in_config = False + is_default_in_config = True if not (is_default_generation_value or is_default_in_config): non_default_generation_parameters[parameter_name] = getattr(self, parameter_name) return non_default_generation_parameters From ed761f50f919665abf6efef71ed144dfd0bbb556 Mon Sep 17 00:00:00 2001 From: Joao Gante Date: Wed, 14 Aug 2024 11:12:10 +0000 Subject: [PATCH 09/35] commenting logic --- src/transformers/configuration_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/configuration_utils.py b/src/transformers/configuration_utils.py index 40b3d68684d1..a06cc72dbe76 100755 --- a/src/transformers/configuration_utils.py +++ b/src/transformers/configuration_utils.py @@ -1097,7 +1097,7 @@ def _get_non_default_generation_parameters(self) -> Dict[str, Any]: if default_config is not None: is_default_in_config = getattr(self, parameter_name) == getattr(default_config, parameter_name) else: - is_default_in_config = True + is_default_in_config = False # we can't confirm that it is okay if not (is_default_generation_value or is_default_in_config): non_default_generation_parameters[parameter_name] = getattr(self, parameter_name) return non_default_generation_parameters From 20c0f16d7c9d955655b83237dd371245c97eb317 Mon Sep 17 00:00:00 2001 From: Joao Gante Date: Wed, 14 Aug 2024 11:21:28 +0000 Subject: [PATCH 10/35] improve logic (default config should be the first thing we check) --- src/transformers/configuration_utils.py | 22 +++++++++++++--------- src/transformers/modeling_utils.py | 2 +- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/src/transformers/configuration_utils.py b/src/transformers/configuration_utils.py index a06cc72dbe76..f6c8a63b56c9 100755 --- a/src/transformers/configuration_utils.py +++ b/src/transformers/configuration_utils.py @@ -287,7 +287,7 @@ def __init__(self, **kwargs): # Retrocompatibility: Parameters for sequence generation. While we will keep the ability to load these # parameters, saving them will be deprecated. In a distant future, we won't need to load them. - for parameter_name, default_value in self._get_generation_defaults().items(): + for parameter_name, default_value in self._get_global_generation_defaults().items(): setattr(self, parameter_name, kwargs.pop(parameter_name, default_value)) # Fine-tuning task arguments @@ -1046,7 +1046,7 @@ def register_for_auto_class(cls, auto_class="AutoConfig"): cls._auto_class = auto_class @staticmethod - def _get_generation_defaults() -> Dict[str, Any]: + def _get_global_generation_defaults() -> Dict[str, Any]: return { "max_length": 20, "min_length": 0, @@ -1087,19 +1087,23 @@ def _get_non_default_generation_parameters(self) -> Dict[str, Any]: except ValueError: default_config = None - for parameter_name, default_value in self._get_generation_defaults().items(): + for parameter_name, default_global_value in self._get_global_generation_defaults().items(): if hasattr(self, parameter_name): + is_default_in_config = is_default_generation_value = None # Two cases in which is okay for the model config to hold generation config parameters: - # 1. The parameter is set to a default generation value (from `generate`'s perspective, it's the same - # as if nothing is set) - is_default_generation_value = getattr(self, parameter_name) == default_value - # 2. The parameter is set as default in the model config (BC support for models like BART) + # 1. If we have a default config, then the instance should hold the same generation defaults if default_config is not None: is_default_in_config = getattr(self, parameter_name) == getattr(default_config, parameter_name) + # 2. if we don't have a default config, then the instance should hold the global generation defaults else: - is_default_in_config = False # we can't confirm that it is okay - if not (is_default_generation_value or is_default_in_config): + is_default_generation_value = getattr(self, parameter_name) == default_global_value + + is_non_default = (is_default_in_config is False) or ( + is_default_in_config is None and is_default_generation_value is False + ) + if is_non_default: non_default_generation_parameters[parameter_name] = getattr(self, parameter_name) + return non_default_generation_parameters def _has_non_default_generation_parameters(self) -> bool: diff --git a/src/transformers/modeling_utils.py b/src/transformers/modeling_utils.py index 88c73cec1bc1..d8d706a97c1f 100755 --- a/src/transformers/modeling_utils.py +++ b/src/transformers/modeling_utils.py @@ -2578,7 +2578,7 @@ def save_pretrained( "Moving the following attributes in the config to the generation config: " f"{misplaced_generation_parameters}" ) - default_generation_arguments = model_to_save.config._get_generation_defaults() + default_generation_arguments = model_to_save.config._get_global_generation_defaults() for param_name, param_value in misplaced_generation_parameters.items(): setattr(model_to_save.generation_config, param_name, param_value) setattr(model_to_save.config, param_name, default_generation_arguments[param_name]) From 524a1b2602339f30197a9059ebe2693abdf7b9a4 Mon Sep 17 00:00:00 2001 From: Joao Gante Date: Wed, 14 Aug 2024 11:47:53 +0000 Subject: [PATCH 11/35] whisper - remove custom generative config attribute --- tests/models/whisper/test_modeling_whisper.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/tests/models/whisper/test_modeling_whisper.py b/tests/models/whisper/test_modeling_whisper.py index 6deebf552b91..f3d191b4d3c4 100644 --- a/tests/models/whisper/test_modeling_whisper.py +++ b/tests/models/whisper/test_modeling_whisper.py @@ -218,7 +218,6 @@ def __init__( decoder_start_token_id=85, num_conv_layers=1, suppress_tokens=None, - begin_suppress_tokens=None, ): self.parent = parent self.batch_size = batch_size @@ -243,7 +242,6 @@ def __init__( self.decoder_start_token_id = decoder_start_token_id self.num_conv_layers = num_conv_layers self.suppress_tokens = suppress_tokens - self.begin_suppress_tokens = begin_suppress_tokens def prepare_config_and_inputs(self): input_features = floats_tensor([self.batch_size, self.num_mel_bins, self.seq_length], self.vocab_size) @@ -280,7 +278,6 @@ def get_config(self): encoder_ffn_dim=self.hidden_size, decoder_start_token_id=self.decoder_start_token_id, suppress_tokens=self.suppress_tokens, - begin_suppress_tokens=self.begin_suppress_tokens, ) def prepare_config_and_inputs_for_common(self): @@ -3309,7 +3306,6 @@ def __init__( num_mel_bins=80, num_conv_layers=1, suppress_tokens=None, - begin_suppress_tokens=None, classifier_proj_size=4, num_labels=2, is_encoder_decoder=False, @@ -3332,7 +3328,6 @@ def __init__( self.max_source_positions = max_source_positions self.num_conv_layers = num_conv_layers self.suppress_tokens = suppress_tokens - self.begin_suppress_tokens = begin_suppress_tokens self.classifier_proj_size = classifier_proj_size self.num_labels = num_labels self.is_encoder_decoder = is_encoder_decoder @@ -3353,7 +3348,6 @@ def get_config(self): decoder_ffn_dim=self.hidden_size, encoder_ffn_dim=self.hidden_size, suppress_tokens=self.suppress_tokens, - begin_suppress_tokens=self.begin_suppress_tokens, classifier_proj_size=self.classifier_proj_size, num_labels=self.num_labels, is_encoder_decoder=self.is_encoder_decoder, @@ -3685,7 +3679,6 @@ def __init__( decoder_start_token_id=85, num_conv_layers=1, suppress_tokens=None, - begin_suppress_tokens=None, ): self.parent = parent self.batch_size = batch_size @@ -3709,7 +3702,6 @@ def __init__( self.decoder_start_token_id = decoder_start_token_id self.num_conv_layers = num_conv_layers self.suppress_tokens = suppress_tokens - self.begin_suppress_tokens = begin_suppress_tokens def prepare_config_and_inputs(self): input_features = floats_tensor([self.batch_size, self.num_mel_bins, self.seq_length], self.vocab_size) @@ -3765,7 +3757,6 @@ def get_config(self): encoder_ffn_dim=self.hidden_size, decoder_start_token_id=self.decoder_start_token_id, suppress_tokens=self.suppress_tokens, - begin_suppress_tokens=self.begin_suppress_tokens, ) def prepare_config_and_inputs_for_common(self): From ec162c9475021dd68ec690c4f2eab4e719c7f5a6 Mon Sep 17 00:00:00 2001 From: Joao Gante Date: Wed, 14 Aug 2024 11:56:38 +0000 Subject: [PATCH 12/35] tf/flax whisper -- remove unwanted custom config --- tests/models/whisper/test_modeling_flax_whisper.py | 6 ------ tests/models/whisper/test_modeling_tf_whisper.py | 3 --- 2 files changed, 9 deletions(-) diff --git a/tests/models/whisper/test_modeling_flax_whisper.py b/tests/models/whisper/test_modeling_flax_whisper.py index 4b8092e800ad..065c6536481d 100644 --- a/tests/models/whisper/test_modeling_flax_whisper.py +++ b/tests/models/whisper/test_modeling_flax_whisper.py @@ -84,7 +84,6 @@ def __init__( decoder_start_token_id=85, num_conv_layers=1, suppress_tokens=None, - begin_suppress_tokens=None, ): self.parent = parent self.batch_size = batch_size @@ -118,7 +117,6 @@ def __init__( self.decoder_start_token_id = decoder_start_token_id self.num_conv_layers = num_conv_layers self.suppress_tokens = suppress_tokens - self.begin_suppress_tokens = begin_suppress_tokens def prepare_config_and_inputs_for_common(self): input_features = floats_tensor([self.batch_size, self.num_mel_bins, self.seq_length], self.vocab_size) @@ -147,7 +145,6 @@ def prepare_config_and_inputs_for_common(self): encoder_ffn_dim=self.encoder_ffn_dim, encoder_layers=self.encoder_layers, suppress_tokens=self.suppress_tokens, - begin_suppress_tokens=self.begin_suppress_tokens, ) inputs_dict = prepare_whisper_inputs_dict(config, input_features, decoder_input_ids) return config, inputs_dict @@ -741,7 +738,6 @@ def __init__( num_mel_bins=80, num_conv_layers=1, suppress_tokens=None, - begin_suppress_tokens=None, classifier_proj_size=4, num_labels=2, is_encoder_decoder=False, @@ -764,7 +760,6 @@ def __init__( self.max_source_positions = max_source_positions self.num_conv_layers = num_conv_layers self.suppress_tokens = suppress_tokens - self.begin_suppress_tokens = begin_suppress_tokens self.classifier_proj_size = classifier_proj_size self.num_labels = num_labels self.is_encoder_decoder = is_encoder_decoder @@ -785,7 +780,6 @@ def get_config(self): decoder_ffn_dim=self.hidden_size, encoder_ffn_dim=self.hidden_size, suppress_tokens=self.suppress_tokens, - begin_suppress_tokens=self.begin_suppress_tokens, classifier_proj_size=self.classifier_proj_size, num_labels=self.num_labels, is_encoder_decoder=self.is_encoder_decoder, diff --git a/tests/models/whisper/test_modeling_tf_whisper.py b/tests/models/whisper/test_modeling_tf_whisper.py index b200671e048f..be311486267d 100644 --- a/tests/models/whisper/test_modeling_tf_whisper.py +++ b/tests/models/whisper/test_modeling_tf_whisper.py @@ -104,7 +104,6 @@ def __init__( decoder_start_token_id=85, num_conv_layers=1, suppress_tokens=None, - begin_suppress_tokens=None, ): self.parent = parent self.batch_size = batch_size @@ -129,7 +128,6 @@ def __init__( self.decoder_start_token_id = decoder_start_token_id self.num_conv_layers = num_conv_layers self.suppress_tokens = suppress_tokens - self.begin_suppress_tokens = begin_suppress_tokens def prepare_config_and_inputs(self): input_features = floats_tensor([self.batch_size, self.num_mel_bins, self.seq_length], self.vocab_size) @@ -166,7 +164,6 @@ def get_config(self): encoder_ffn_dim=self.hidden_size, decoder_start_token_id=self.decoder_start_token_id, suppress_tokens=self.suppress_tokens, - begin_suppress_tokens=self.begin_suppress_tokens, ) def prepare_config_and_inputs_for_common(self): From 8e15beaaa1d120890fd715b361b5e9d07c3a1340 Mon Sep 17 00:00:00 2001 From: Joao Gante Date: Wed, 14 Aug 2024 12:20:31 +0000 Subject: [PATCH 13/35] remove custom bart config --- tests/models/bart/test_modeling_bart.py | 8 -------- tests/models/blenderbot/test_modeling_blenderbot.py | 8 -------- .../blenderbot_small/test_modeling_blenderbot_small.py | 8 -------- tests/models/marian/test_modeling_marian.py | 8 -------- tests/models/mbart/test_modeling_mbart.py | 8 -------- tests/models/pegasus/test_modeling_pegasus.py | 8 -------- 6 files changed, 48 deletions(-) diff --git a/tests/models/bart/test_modeling_bart.py b/tests/models/bart/test_modeling_bart.py index dd0cb5bf4c0b..eda51d21199f 100644 --- a/tests/models/bart/test_modeling_bart.py +++ b/tests/models/bart/test_modeling_bart.py @@ -123,12 +123,6 @@ def __init__( self.pad_token_id = pad_token_id self.bos_token_id = bos_token_id - # forcing a certain token to be generated, sets all other tokens to -inf - # if however the token to be generated is already at -inf then it can lead token - # `nan` values and thus break generation - self.forced_bos_token_id = None - self.forced_eos_token_id = None - def prepare_config_and_inputs(self): input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size).clamp( @@ -158,8 +152,6 @@ def get_config(self): eos_token_id=self.eos_token_id, bos_token_id=self.bos_token_id, pad_token_id=self.pad_token_id, - forced_bos_token_id=self.forced_bos_token_id, - forced_eos_token_id=self.forced_eos_token_id, ) def get_pipeline_config(self): diff --git a/tests/models/blenderbot/test_modeling_blenderbot.py b/tests/models/blenderbot/test_modeling_blenderbot.py index fa0797cbeed8..c7e8be3819f2 100644 --- a/tests/models/blenderbot/test_modeling_blenderbot.py +++ b/tests/models/blenderbot/test_modeling_blenderbot.py @@ -116,12 +116,6 @@ def __init__( self.pad_token_id = pad_token_id self.bos_token_id = bos_token_id - # forcing a certain token to be generated, sets all other tokens to -inf - # if however the token to be generated is already at -inf then it can lead token - # `nan` values and thus break generation - self.forced_bos_token_id = None - self.forced_eos_token_id = None - def prepare_config_and_inputs(self): input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size).clamp( 3, @@ -150,8 +144,6 @@ def get_config(self): eos_token_id=self.eos_token_id, bos_token_id=self.bos_token_id, pad_token_id=self.pad_token_id, - forced_bos_token_id=self.forced_bos_token_id, - forced_eos_token_id=self.forced_eos_token_id, ) def get_pipeline_config(self): diff --git a/tests/models/blenderbot_small/test_modeling_blenderbot_small.py b/tests/models/blenderbot_small/test_modeling_blenderbot_small.py index 6be86a66b98e..59f68b547547 100644 --- a/tests/models/blenderbot_small/test_modeling_blenderbot_small.py +++ b/tests/models/blenderbot_small/test_modeling_blenderbot_small.py @@ -113,12 +113,6 @@ def __init__( self.pad_token_id = pad_token_id self.bos_token_id = bos_token_id - # forcing a certain token to be generated, sets all other tokens to -inf - # if however the token to be generated is already at -inf then it can lead token - # `nan` values and thus break generation - self.forced_bos_token_id = None - self.forced_eos_token_id = None - def prepare_config_and_inputs(self): input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size).clamp( 3, @@ -147,8 +141,6 @@ def get_config(self): eos_token_id=self.eos_token_id, bos_token_id=self.bos_token_id, pad_token_id=self.pad_token_id, - forced_bos_token_id=self.forced_bos_token_id, - forced_eos_token_id=self.forced_eos_token_id, ) def prepare_config_and_inputs_for_common(self): diff --git a/tests/models/marian/test_modeling_marian.py b/tests/models/marian/test_modeling_marian.py index 4f4fa36622d1..aed5381fcc70 100644 --- a/tests/models/marian/test_modeling_marian.py +++ b/tests/models/marian/test_modeling_marian.py @@ -132,12 +132,6 @@ def __init__( self.bos_token_id = bos_token_id self.decoder_start_token_id = decoder_start_token_id - # forcing a certain token to be generated, sets all other tokens to -inf - # if however the token to be generated is already at -inf then it can lead token - # `nan` values and thus break generation - self.forced_bos_token_id = None - self.forced_eos_token_id = None - def prepare_config_and_inputs(self): input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size).clamp( 3, @@ -167,8 +161,6 @@ def get_config(self): bos_token_id=self.bos_token_id, pad_token_id=self.pad_token_id, decoder_start_token_id=self.decoder_start_token_id, - forced_bos_token_id=self.forced_bos_token_id, - forced_eos_token_id=self.forced_eos_token_id, ) def prepare_config_and_inputs_for_common(self): diff --git a/tests/models/mbart/test_modeling_mbart.py b/tests/models/mbart/test_modeling_mbart.py index 5a8263e11969..9401d892daa3 100644 --- a/tests/models/mbart/test_modeling_mbart.py +++ b/tests/models/mbart/test_modeling_mbart.py @@ -120,12 +120,6 @@ def __init__( self.pad_token_id = pad_token_id self.bos_token_id = bos_token_id - # forcing a certain token to be generated, sets all other tokens to -inf - # if however the token to be generated is already at -inf then it can lead token - # `nan` values and thus break generation - self.forced_bos_token_id = None - self.forced_eos_token_id = None - def prepare_config_and_inputs(self): input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size).clamp( @@ -155,8 +149,6 @@ def get_config(self): eos_token_id=self.eos_token_id, bos_token_id=self.bos_token_id, pad_token_id=self.pad_token_id, - forced_bos_token_id=self.forced_bos_token_id, - forced_eos_token_id=self.forced_eos_token_id, ) def prepare_config_and_inputs_for_common(self): diff --git a/tests/models/pegasus/test_modeling_pegasus.py b/tests/models/pegasus/test_modeling_pegasus.py index f7de1258847d..2bd102b904e3 100644 --- a/tests/models/pegasus/test_modeling_pegasus.py +++ b/tests/models/pegasus/test_modeling_pegasus.py @@ -112,12 +112,6 @@ def __init__( self.pad_token_id = pad_token_id self.bos_token_id = bos_token_id - # forcing a certain token to be generated, sets all other tokens to -inf - # if however the token to be generated is already at -inf then it can lead token - # `nan` values and thus break generation - self.forced_bos_token_id = None - self.forced_eos_token_id = None - def prepare_config_and_inputs(self): input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size).clamp( @@ -165,8 +159,6 @@ def get_config(self): eos_token_id=self.eos_token_id, bos_token_id=self.bos_token_id, pad_token_id=self.pad_token_id, - forced_bos_token_id=self.forced_bos_token_id, - forced_eos_token_id=self.forced_eos_token_id, ) def prepare_config_and_inputs_for_common(self): From 0f10ac9630a56ae7b66b42b598ee075ec5ce8063 Mon Sep 17 00:00:00 2001 From: Joao Gante Date: Wed, 14 Aug 2024 12:31:18 +0000 Subject: [PATCH 14/35] raise ValueError instead of assert --- .../encoder_decoder/configuration_encoder_decoder.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/transformers/models/encoder_decoder/configuration_encoder_decoder.py b/src/transformers/models/encoder_decoder/configuration_encoder_decoder.py index 8c0ae2771e81..62a71cd72b15 100644 --- a/src/transformers/models/encoder_decoder/configuration_encoder_decoder.py +++ b/src/transformers/models/encoder_decoder/configuration_encoder_decoder.py @@ -74,9 +74,11 @@ class EncoderDecoderConfig(PretrainedConfig): def __init__(self, **kwargs): super().__init__(**kwargs) - assert ( - "encoder" in kwargs and "decoder" in kwargs - ), "Config has to be initialized with encoder and decoder config" + if "encoder" not in kwargs or "decoder" not in kwargs: + raise ValueError( + f"A configuraton of type {self.model_type} cannot be instantiated because " + f"not both `encoder` and `decoder` sub-configurations are passed, but only {kwargs}" + ) encoder_config = kwargs.pop("encoder") encoder_model_type = encoder_config.pop("model_type") decoder_config = kwargs.pop("decoder") From 157aae9e859ce0c39c397a34e3e9d487aec51523 Mon Sep 17 00:00:00 2001 From: Joao Gante Date: Wed, 14 Aug 2024 12:39:11 +0000 Subject: [PATCH 15/35] remove test for deleted feature --- tests/utils/test_cli.py | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/tests/utils/test_cli.py b/tests/utils/test_cli.py index b208ff19f1a4..148f091c2794 100644 --- a/tests/utils/test_cli.py +++ b/tests/utils/test_cli.py @@ -18,7 +18,7 @@ import unittest from unittest.mock import patch -from transformers.testing_utils import CaptureStd, is_pt_tf_cross_test, require_torch +from transformers.testing_utils import CaptureStd, require_torch class CLITest(unittest.TestCase): @@ -33,18 +33,6 @@ def test_cli_env(self): self.assertIn("Platform", cs.out) self.assertIn("Using distributed or parallel set-up in script?", cs.out) - @is_pt_tf_cross_test - @patch( - "sys.argv", ["fakeprogrampath", "pt-to-tf", "--model-name", "hf-internal-testing/tiny-random-gptj", "--no-pr"] - ) - def test_cli_pt_to_tf(self): - import transformers.commands.transformers_cli - - shutil.rmtree("/tmp/hf-internal-testing/tiny-random-gptj", ignore_errors=True) # cleans potential past runs - transformers.commands.transformers_cli.main() - - self.assertTrue(os.path.exists("/tmp/hf-internal-testing/tiny-random-gptj/tf_model.h5")) - @require_torch @patch("sys.argv", ["fakeprogrampath", "download", "hf-internal-testing/tiny-random-gptj", "--cache-dir", "/tmp"]) def test_cli_download(self): From 47ec56ad91154090faad15f75ae9133566bd4d74 Mon Sep 17 00:00:00 2001 From: Joao Gante Date: Wed, 14 Aug 2024 14:23:30 +0000 Subject: [PATCH 16/35] default config preference --- src/transformers/configuration_utils.py | 6 ------ src/transformers/generation/utils.py | 9 ++++----- tests/utils/test_configuration_utils.py | 8 ++++---- 3 files changed, 8 insertions(+), 15 deletions(-) diff --git a/src/transformers/configuration_utils.py b/src/transformers/configuration_utils.py index f6c8a63b56c9..f354f0ae4196 100755 --- a/src/transformers/configuration_utils.py +++ b/src/transformers/configuration_utils.py @@ -1106,12 +1106,6 @@ def _get_non_default_generation_parameters(self) -> Dict[str, Any]: return non_default_generation_parameters - def _has_non_default_generation_parameters(self) -> bool: - """ - Whether or not this instance holds non-default generation parameters. - """ - return len(self._get_non_default_generation_parameters()) > 0 - def get_configuration_file(configuration_files: List[str]) -> str: """ diff --git a/src/transformers/generation/utils.py b/src/transformers/generation/utils.py index 5e93a115acd5..409a756103df 100644 --- a/src/transformers/generation/utils.py +++ b/src/transformers/generation/utils.py @@ -1350,19 +1350,18 @@ def _prepare_generation_config( using_model_generation_config = False if generation_config is None: # legacy: users may modify the model configuration to control generation. To trigger this legacy behavior, - # three conditions must be met + # the following conditions must be met # 1) the generation config must have been created from the model config (`_from_model_config` field); # 2) the generation config must have seen no modification since its creation (the hash is the same); # 3) the user must have set generation parameters in the model config. # NOTE: `torch.compile` can't compile `hash`, this legacy support is disabled with compilation. if ( not is_torchdynamo_compiling() - and self.generation_config._from_model_config - and self.generation_config._original_object_hash == hash(self.generation_config) - and self.config._has_non_default_generation_parameters() + and self.generation_config._from_model_config # 1) + and self.generation_config._original_object_hash == hash(self.generation_config) # 2) ): new_generation_config = GenerationConfig.from_model_config(self.config) - if new_generation_config != self.generation_config: + if new_generation_config != self.generation_config: # 3) warnings.warn( "You have modified the pretrained model configuration to control generation. This is a" " deprecated strategy to control generation and will be removed soon, in a future version." diff --git a/tests/utils/test_configuration_utils.py b/tests/utils/test_configuration_utils.py index f00f8f04c3f2..6b684867eb87 100644 --- a/tests/utils/test_configuration_utils.py +++ b/tests/utils/test_configuration_utils.py @@ -321,13 +321,13 @@ def test_saving_config_with_custom_generation_kwargs_raises_exception(self): with self.assertRaises(ValueError): config.save_pretrained(tmp_dir) - def test_has_non_default_generation_parameters(self): + def test_get_non_default_generation_parameters(self): config = BertConfig() - self.assertFalse(config._has_non_default_generation_parameters()) + self.assertFalse(len(config._get_non_default_generation_parameters()) > 0) config = BertConfig(min_length=3) - self.assertTrue(config._has_non_default_generation_parameters()) + self.assertTrue(len(config._get_non_default_generation_parameters()) > 0) config = BertConfig(min_length=0) # `min_length = 0` is a default generation kwarg - self.assertFalse(config._has_non_default_generation_parameters()) + self.assertFalse(len(config._get_non_default_generation_parameters()) > 0) def test_loading_config_do_not_raise_future_warnings(self): """Regression test for https://github.com/huggingface/transformers/issues/31002.""" From e05590050d26b5d60e1764d2a84966673a8ba199 Mon Sep 17 00:00:00 2001 From: Joao Gante Date: Wed, 14 Aug 2024 14:43:08 +0000 Subject: [PATCH 17/35] yup, that should raise a warning --- tests/generation/test_utils.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/generation/test_utils.py b/tests/generation/test_utils.py index 72da44115f5c..59d5036b8c8f 100644 --- a/tests/generation/test_utils.py +++ b/tests/generation/test_utils.py @@ -2778,7 +2778,7 @@ def forward(self, input_ids, **kwargs): def test_default_max_length_warning(self): model = AutoModelForCausalLM.from_pretrained("hf-internal-testing/tiny-random-gpt2").to(torch_device) tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-gpt2") - model.config.pad_token_id = tokenizer.eos_token_id + model.generation_config.pad_token_id = tokenizer.eos_token_id text = "Hello world" tokenized_inputs = tokenizer([text], return_tensors="pt") @@ -2805,8 +2805,8 @@ def test_length_warning_assisted_generation(self): model = AutoModelForCausalLM.from_pretrained("hf-internal-testing/tiny-random-gpt2").to(torch_device) assistant = AutoModelForCausalLM.from_pretrained("hf-internal-testing/tiny-random-gpt2").to(torch_device) tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-gpt2") - model.config.pad_token_id = tokenizer.eos_token_id - assistant.config.pad_token_id = tokenizer.eos_token_id + model.generation_config.pad_token_id = tokenizer.eos_token_id + assistant.generation_config.pad_token_id = tokenizer.eos_token_id text = "Hello world" tokenized_inputs = tokenizer([text], return_tensors="pt") @@ -2827,8 +2827,8 @@ def test_generated_length_assisted_generation(self): model = AutoModelForCausalLM.from_pretrained("hf-internal-testing/tiny-random-gpt2").to(torch_device) assistant = AutoModelForCausalLM.from_pretrained("hf-internal-testing/tiny-random-gpt2").to(torch_device) tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-gpt2") - model.config.pad_token_id = tokenizer.eos_token_id - assistant.config.pad_token_id = tokenizer.eos_token_id + model.generation_config.pad_token_id = tokenizer.eos_token_id + assistant.generation_config.pad_token_id = tokenizer.eos_token_id text = "Hello world" tokenized_inputs = tokenizer([text], return_tensors="pt") @@ -2854,7 +2854,7 @@ def test_model_kwarg_assisted_decoding_decoder_only(self): # PT-only test: TF doesn't support assisted decoding yet. model = AutoModelForCausalLM.from_pretrained("hf-internal-testing/tiny-random-gpt2").to(torch_device) tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-gpt2") - model.config.pad_token_id = tokenizer.eos_token_id + model.generation_config.pad_token_id = tokenizer.eos_token_id text = "Hello world" tokenized_inputs = tokenizer([text], return_tensors="pt") From 4473394a61daefc8cf7530ee74376bf98d90c805 Mon Sep 17 00:00:00 2001 From: Joao Gante Date: Wed, 14 Aug 2024 14:58:39 +0000 Subject: [PATCH 18/35] another model tester with generation params --- .../decision_transformer/test_modeling_decision_transformer.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/tests/models/decision_transformer/test_modeling_decision_transformer.py b/tests/models/decision_transformer/test_modeling_decision_transformer.py index 27d1598167e6..0c95e6291c50 100644 --- a/tests/models/decision_transformer/test_modeling_decision_transformer.py +++ b/tests/models/decision_transformer/test_modeling_decision_transformer.py @@ -41,7 +41,6 @@ def __init__( act_dim=6, state_dim=17, hidden_size=23, - max_length=11, is_training=True, ): self.parent = parent @@ -50,7 +49,6 @@ def __init__( self.act_dim = act_dim self.state_dim = state_dim self.hidden_size = hidden_size - self.max_length = max_length self.is_training = is_training def prepare_config_and_inputs(self): @@ -80,7 +78,6 @@ def get_config(self): act_dim=self.act_dim, state_dim=self.state_dim, hidden_size=self.hidden_size, - max_length=self.max_length, ) def create_and_check_model( From ccf956b09272fefd4b790d4c303639df8128ea25 Mon Sep 17 00:00:00 2001 From: Joao Gante Date: Wed, 14 Aug 2024 15:50:59 +0000 Subject: [PATCH 19/35] another model --- tests/models/blenderbot/test_modeling_blenderbot.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/tests/models/blenderbot/test_modeling_blenderbot.py b/tests/models/blenderbot/test_modeling_blenderbot.py index c7e8be3819f2..cecedb8a9071 100644 --- a/tests/models/blenderbot/test_modeling_blenderbot.py +++ b/tests/models/blenderbot/test_modeling_blenderbot.py @@ -360,7 +360,6 @@ def __init__( decoder_attention_heads=4, max_position_embeddings=30, is_encoder_decoder=False, - encoder_no_repeat_ngram_size=0, pad_token_id=0, bos_token_id=1, eos_token_id=2, @@ -391,7 +390,6 @@ def __init__( self.use_cache = use_cache self.max_position_embeddings = max_position_embeddings self.is_encoder_decoder = is_encoder_decoder - self.encoder_no_repeat_ngram_size = encoder_no_repeat_ngram_size self.scope = None self.decoder_key_length = decoder_seq_length @@ -423,7 +421,6 @@ def prepare_config_and_inputs(self): decoder_start_token_id=self.decoder_start_token_id, max_position_embeddings=self.max_position_embeddings, is_encoder_decoder=self.is_encoder_decoder, - encoder_no_repeat_ngram_size=self.encoder_no_repeat_ngram_size, ) return ( From 0a51dad65a89274b63203b883faf3f3b651e1636 Mon Sep 17 00:00:00 2001 From: Joao Gante Date: Wed, 14 Aug 2024 16:02:59 +0000 Subject: [PATCH 20/35] single worker for tf examples job --- .circleci/create_circleci_config.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.circleci/create_circleci_config.py b/.circleci/create_circleci_config.py index d783488caecc..2e0af429a207 100644 --- a/.circleci/create_circleci_config.py +++ b/.circleci/create_circleci_config.py @@ -327,7 +327,8 @@ def job_name(self): cache_name="tensorflow_examples", docker_image=[{"image":"huggingface/transformers-examples-tf"}], install_steps=["uv venv && uv pip install . && uv pip install -r examples/tensorflow/_tests_requirements.txt"], - parallelism=8 + parallelism=8, + pytest_num_workers=1 ) From 8619937171208aa8264b90b21ddec3d33596ef85 Mon Sep 17 00:00:00 2001 From: Joao Gante Date: Wed, 14 Aug 2024 16:18:56 +0000 Subject: [PATCH 21/35] handle defaults for composite models --- src/transformers/configuration_utils.py | 6 +++++- src/transformers/models/rag/configuration_rag.py | 8 +++++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/transformers/configuration_utils.py b/src/transformers/configuration_utils.py index f354f0ae4196..798f7a402335 100755 --- a/src/transformers/configuration_utils.py +++ b/src/transformers/configuration_utils.py @@ -1081,11 +1081,15 @@ def _get_non_default_generation_parameters(self) -> Dict[str, Any]: """ non_default_generation_parameters = {} - # Some composite models don't have a default config + # Some composite models don't have a default config -- try to use their decoder config as a fallback try: default_config = self.__class__() except ValueError: default_config = None + for possible_decoder_attribute_name in ("decoder", "generator"): + if hasattr(self, possible_decoder_attribute_name): + default_config = getattr(self, possible_decoder_attribute_name).__class__() + break for parameter_name, default_global_value in self._get_global_generation_defaults().items(): if hasattr(self, parameter_name): diff --git a/src/transformers/models/rag/configuration_rag.py b/src/transformers/models/rag/configuration_rag.py index 5dd4d12c5e74..d1fab393557b 100644 --- a/src/transformers/models/rag/configuration_rag.py +++ b/src/transformers/models/rag/configuration_rag.py @@ -124,9 +124,11 @@ def __init__( vocab_size=vocab_size, **kwargs, ) - assert ( - "question_encoder" in kwargs and "generator" in kwargs - ), "Config has to be initialized with question_encoder and generator config" + if "question_encoder" not in kwargs or "generator" not in kwargs: + raise ValueError( + f"A configuraton of type {self.model_type} cannot be instantiated because " + f"not both `question_encoder` and `generator` sub-configurations are passed, but only {kwargs}" + ) question_encoder_config = kwargs.pop("question_encoder") question_encoder_model_type = question_encoder_config.pop("model_type") decoder_config = kwargs.pop("generator") From 884013ffa5c9d3f4f08c39740763217298eb7ea6 Mon Sep 17 00:00:00 2001 From: Joao Gante Date: Wed, 14 Aug 2024 17:08:03 +0000 Subject: [PATCH 22/35] update tf whisper logits processors to handle edge case --- .../generation/tf_logits_process.py | 40 ++++++++++++------- 1 file changed, 26 insertions(+), 14 deletions(-) diff --git a/src/transformers/generation/tf_logits_process.py b/src/transformers/generation/tf_logits_process.py index fc9799b7ab39..58824b7b0071 100644 --- a/src/transformers/generation/tf_logits_process.py +++ b/src/transformers/generation/tf_logits_process.py @@ -520,15 +520,21 @@ def __init__(self, begin_suppress_tokens, begin_index): self.begin_index = begin_index def __call__(self, input_ids: tf.Tensor, scores: tf.Tensor, cur_len: int) -> tf.Tensor: - scores = tf.cond( - tf.equal(cur_len, self.begin_index), - lambda: tf.tensor_scatter_nd_update( - scores, - indices=[[i, token] for i in range(scores.shape[0]) for token in self.begin_suppress_tokens], - updates=[-float("inf") for _ in range(scores.shape[0] * len(self.begin_suppress_tokens))], - ), - lambda: scores, - ) + suppressed_indices = [] + for token in self.begin_suppress_tokens: + if token < scores.shape[-1]: # to ensure we don't go beyond the vocab size + suppressed_indices.extend([[i, token] for i in range(scores.shape[0])]) + + if len(suppressed_indices) > 0: + scores = tf.cond( + tf.equal(cur_len, self.begin_index), + lambda: tf.tensor_scatter_nd_update( + scores, + indices=suppressed_indices, + updates=[-float("inf") for _ in range(scores.shape[0] * len(self.begin_suppress_tokens))], + ), + lambda: scores, + ) return scores @@ -540,11 +546,17 @@ def __init__(self, suppress_tokens): self.suppress_tokens = list(suppress_tokens) def __call__(self, input_ids: tf.Tensor, scores: tf.Tensor, cur_len: int) -> tf.Tensor: - scores = tf.tensor_scatter_nd_update( - scores, - indices=[[i, token] for i in range(scores.shape[0]) for token in self.suppress_tokens], - updates=[-float("inf") for _ in range(scores.shape[0] * len(self.suppress_tokens))], - ) + suppressed_indices = [] + for token in self.suppress_tokens: + if token < scores.shape[-1]: # to ensure we don't go beyond the vocab size + suppressed_indices.extend([[i, token] for i in range(scores.shape[0])]) + + if len(suppressed_indices) > 0: + scores = tf.tensor_scatter_nd_update( + scores, + indices=[[i, token] for i in range(scores.shape[0]) for token in self.suppress_tokens], + updates=[-float("inf") for _ in range(scores.shape[0] * len(self.suppress_tokens))], + ) return scores From 8ab43048992ccb3a03730eb6e691202ae2d5fc52 Mon Sep 17 00:00:00 2001 From: Joao Gante Date: Wed, 14 Aug 2024 17:27:34 +0000 Subject: [PATCH 23/35] even more precise --- src/transformers/configuration_utils.py | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/src/transformers/configuration_utils.py b/src/transformers/configuration_utils.py index 798f7a402335..cb4819d6c389 100755 --- a/src/transformers/configuration_utils.py +++ b/src/transformers/configuration_utils.py @@ -1081,32 +1081,39 @@ def _get_non_default_generation_parameters(self) -> Dict[str, Any]: """ non_default_generation_parameters = {} - # Some composite models don't have a default config -- try to use their decoder config as a fallback + # Composite models don't have a default config, use their decoder config as a fallback for default values + # If no known pattern is matched, then `default_config = None` -> check against the global generation defaults try: default_config = self.__class__() except ValueError: default_config = None - for possible_decoder_attribute_name in ("decoder", "generator"): - if hasattr(self, possible_decoder_attribute_name): - default_config = getattr(self, possible_decoder_attribute_name).__class__() + decoder_attribute_name = None + for decoder_attribute_name in ("decoder", "generator", "text_config"): + if hasattr(self, decoder_attribute_name): + default_config = getattr(self, decoder_attribute_name).__class__() break + # If it is a composite model, we want to check the subconfig that will be used for generation + self_decoder_config = self if decoder_attribute_name is None else getattr(self, decoder_attribute_name) + for parameter_name, default_global_value in self._get_global_generation_defaults().items(): - if hasattr(self, parameter_name): + if hasattr(self_decoder_config, parameter_name): is_default_in_config = is_default_generation_value = None # Two cases in which is okay for the model config to hold generation config parameters: # 1. If we have a default config, then the instance should hold the same generation defaults if default_config is not None: - is_default_in_config = getattr(self, parameter_name) == getattr(default_config, parameter_name) + is_default_in_config = getattr(self_decoder_config, parameter_name) == getattr( + default_config, parameter_name + ) # 2. if we don't have a default config, then the instance should hold the global generation defaults else: - is_default_generation_value = getattr(self, parameter_name) == default_global_value + is_default_generation_value = getattr(self_decoder_config, parameter_name) == default_global_value is_non_default = (is_default_in_config is False) or ( is_default_in_config is None and is_default_generation_value is False ) if is_non_default: - non_default_generation_parameters[parameter_name] = getattr(self, parameter_name) + non_default_generation_parameters[parameter_name] = getattr(self_decoder_config, parameter_name) return non_default_generation_parameters From a649bdf56e284af968671d8be0033edaf9debad7 Mon Sep 17 00:00:00 2001 From: Joao Gante Date: Wed, 14 Aug 2024 17:32:34 +0000 Subject: [PATCH 24/35] move defaults up --- src/transformers/configuration_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/transformers/configuration_utils.py b/src/transformers/configuration_utils.py index cb4819d6c389..9fba1aa1b0a3 100755 --- a/src/transformers/configuration_utils.py +++ b/src/transformers/configuration_utils.py @@ -1080,14 +1080,14 @@ def _get_non_default_generation_parameters(self) -> Dict[str, Any]: Gets the non-default generation parameters on the PretrainedConfig instance """ non_default_generation_parameters = {} + decoder_attribute_name = None + default_config = None # Composite models don't have a default config, use their decoder config as a fallback for default values # If no known pattern is matched, then `default_config = None` -> check against the global generation defaults try: default_config = self.__class__() except ValueError: - default_config = None - decoder_attribute_name = None for decoder_attribute_name in ("decoder", "generator", "text_config"): if hasattr(self, decoder_attribute_name): default_config = getattr(self, decoder_attribute_name).__class__() From 686f428a92b8833969cc836b9528dd59245c333b Mon Sep 17 00:00:00 2001 From: Joao Gante Date: Thu, 15 Aug 2024 10:08:38 +0000 Subject: [PATCH 25/35] last one? --- .../test_modeling_audio_spectrogram_transformer.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/tests/models/audio_spectrogram_transformer/test_modeling_audio_spectrogram_transformer.py b/tests/models/audio_spectrogram_transformer/test_modeling_audio_spectrogram_transformer.py index 0bbefda7ba50..80a9f50057ed 100644 --- a/tests/models/audio_spectrogram_transformer/test_modeling_audio_spectrogram_transformer.py +++ b/tests/models/audio_spectrogram_transformer/test_modeling_audio_spectrogram_transformer.py @@ -47,7 +47,6 @@ def __init__( parent, batch_size=13, patch_size=2, - max_length=24, num_mel_bins=16, is_training=True, use_labels=True, @@ -68,7 +67,6 @@ def __init__( self.parent = parent self.batch_size = batch_size self.patch_size = patch_size - self.max_length = max_length self.num_mel_bins = num_mel_bins self.is_training = is_training self.use_labels = use_labels @@ -106,7 +104,6 @@ def prepare_config_and_inputs(self): def get_config(self): return ASTConfig( patch_size=self.patch_size, - max_length=self.max_length, num_mel_bins=self.num_mel_bins, hidden_size=self.hidden_size, num_hidden_layers=self.num_hidden_layers, From 655dc2953bc45548cc9c256acbbb70056434687b Mon Sep 17 00:00:00 2001 From: Joao Gante Date: Thu, 15 Aug 2024 10:29:06 +0000 Subject: [PATCH 26/35] handle exception --- src/transformers/configuration_utils.py | 80 +++---------------- ...iguration_audio_spectrogram_transformer.py | 8 ++ ..._modeling_audio_spectrogram_transformer.py | 3 + 3 files changed, 20 insertions(+), 71 deletions(-) diff --git a/src/transformers/configuration_utils.py b/src/transformers/configuration_utils.py index 9fba1aa1b0a3..2fb5b8ff1a45 100755 --- a/src/transformers/configuration_utils.py +++ b/src/transformers/configuration_utils.py @@ -81,6 +81,15 @@ class PretrainedConfig(PushToHubMixin): model. - **num_hidden_layers** (`int`) -- The number of blocks in the model. + + + Setting parameters for sequence generation in the model config is deprecated. For backward compatibility, loading + some of them will still be possible, but attempting to overwrite them will throw an exception -- you should set + them in a [~transformers.GenerationConfig]. Check the documentation of [~transformers.GenerationConfig] for more + information about the individual parameters. + + + Arg: name_or_path (`str`, *optional*, defaults to `""`): Store the string that was passed to [`PreTrainedModel.from_pretrained`] or @@ -117,77 +126,6 @@ class PretrainedConfig(PushToHubMixin): sequence_length embeddings at a time. For more information on feed forward chunking, see [How does Feed Forward Chunking work?](../glossary.html#feed-forward-chunking). - > Parameters for sequence generation - - max_length (`int`, *optional*, defaults to 20): - Maximum length that will be used by default in the `generate` method of the model. - min_length (`int`, *optional*, defaults to 0): - Minimum length that will be used by default in the `generate` method of the model. - do_sample (`bool`, *optional*, defaults to `False`): - Flag that will be used by default in the `generate` method of the model. Whether or not to use sampling ; - use greedy decoding otherwise. - early_stopping (`bool`, *optional*, defaults to `False`): - Flag that will be used by default in the `generate` method of the model. Whether to stop the beam search - when at least `num_beams` sentences are finished per batch or not. - num_beams (`int`, *optional*, defaults to 1): - Number of beams for beam search that will be used by default in the `generate` method of the model. 1 means - no beam search. - num_beam_groups (`int`, *optional*, defaults to 1): - Number of groups to divide `num_beams` into in order to ensure diversity among different groups of beams - that will be used by default in the `generate` method of the model. 1 means no group beam search. - diversity_penalty (`float`, *optional*, defaults to 0.0): - Value to control diversity for group beam search. that will be used by default in the `generate` method of - the model. 0 means no diversity penalty. The higher the penalty, the more diverse are the outputs. - temperature (`float`, *optional*, defaults to 1.0): - The value used to module the next token probabilities that will be used by default in the `generate` method - of the model. Must be strictly positive. - top_k (`int`, *optional*, defaults to 50): - Number of highest probability vocabulary tokens to keep for top-k-filtering that will be used by default in - the `generate` method of the model. - top_p (`float`, *optional*, defaults to 1): - Value that will be used by default in the `generate` method of the model for `top_p`. If set to float < 1, - only the most probable tokens with probabilities that add up to `top_p` or higher are kept for generation. - typical_p (`float`, *optional*, defaults to 1): - Local typicality measures how similar the conditional probability of predicting a target token next is to - the expected conditional probability of predicting a random token next, given the partial text already - generated. If set to float < 1, the smallest set of the most locally typical tokens with probabilities that - add up to `typical_p` or higher are kept for generation. See [this - paper](https://arxiv.org/pdf/2202.00666.pdf) for more details. - repetition_penalty (`float`, *optional*, defaults to 1): - Parameter for repetition penalty that will be used by default in the `generate` method of the model. 1.0 - means no penalty. - length_penalty (`float`, *optional*, defaults to 1): - Exponential penalty to the length that is used with beam-based generation. It is applied as an exponent to - the sequence length, which in turn is used to divide the score of the sequence. Since the score is the log - likelihood of the sequence (i.e. negative), `length_penalty` > 0.0 promotes longer sequences, while - `length_penalty` < 0.0 encourages shorter sequences. - no_repeat_ngram_size (`int`, *optional*, defaults to 0) -- Value that will be used by default in the - `generate` method of the model for `no_repeat_ngram_size`. If set to int > 0, all ngrams of that size can - only occur once. - encoder_no_repeat_ngram_size (`int`, *optional*, defaults to 0) -- Value that will be used by - default in the `generate` method of the model for `encoder_no_repeat_ngram_size`. If set to int > 0, all - ngrams of that size that occur in the `encoder_input_ids` cannot occur in the `decoder_input_ids`. - bad_words_ids (`List[int]`, *optional*): - List of token ids that are not allowed to be generated that will be used by default in the `generate` - method of the model. In order to get the tokens of the words that should not appear in the generated text, - use `tokenizer.encode(bad_word, add_prefix_space=True)`. - num_return_sequences (`int`, *optional*, defaults to 1): - Number of independently computed returned sequences for each element in the batch that will be used by - default in the `generate` method of the model. - output_scores (`bool`, *optional*, defaults to `False`): - Whether the model should return the logits when used for generation. - return_dict_in_generate (`bool`, *optional*, defaults to `False`): - Whether the model should return a [`~transformers.utils.ModelOutput`] instead of a `torch.LongTensor`. - forced_bos_token_id (`int`, *optional*): - The id of the token to force as the first generated token after the `decoder_start_token_id`. Useful for - multilingual models like [mBART](../model_doc/mbart) where the first generated token needs to be the target - language token. - forced_eos_token_id (`int`, *optional*): - The id of the token to force as the last generated token when `max_length` is reached. - remove_invalid_values (`bool`, *optional*): - Whether to remove possible _nan_ and _inf_ outputs of the model to prevent the generation method to crash. - Note that using `remove_invalid_values` can slow down generation. - > Parameters for fine-tuning tasks architectures (`List[str]`, *optional*): diff --git a/src/transformers/models/audio_spectrogram_transformer/configuration_audio_spectrogram_transformer.py b/src/transformers/models/audio_spectrogram_transformer/configuration_audio_spectrogram_transformer.py index 9e1d995dc291..7980667a68d7 100644 --- a/src/transformers/models/audio_spectrogram_transformer/configuration_audio_spectrogram_transformer.py +++ b/src/transformers/models/audio_spectrogram_transformer/configuration_audio_spectrogram_transformer.py @@ -14,6 +14,8 @@ # limitations under the License. """Audio Spectogram Transformer (AST) model configuration""" +from typing import Any, Dict + from ...configuration_utils import PretrainedConfig from ...utils import logging @@ -118,3 +120,9 @@ def __init__( self.time_stride = time_stride self.max_length = max_length self.num_mel_bins = num_mel_bins + + # Overwritten from the parent class: AST is not compatible with `generate`, but has a config parameter sharing the + # same name (`max_length`). Sharing the same name triggers checks regarding the config -> generation_config + # generative parameters deprecation cycle, overwriting this function prevents this from happening. + def _get_non_default_generation_parameters(self) -> Dict[str, Any]: + return {} diff --git a/tests/models/audio_spectrogram_transformer/test_modeling_audio_spectrogram_transformer.py b/tests/models/audio_spectrogram_transformer/test_modeling_audio_spectrogram_transformer.py index 80a9f50057ed..0bbefda7ba50 100644 --- a/tests/models/audio_spectrogram_transformer/test_modeling_audio_spectrogram_transformer.py +++ b/tests/models/audio_spectrogram_transformer/test_modeling_audio_spectrogram_transformer.py @@ -47,6 +47,7 @@ def __init__( parent, batch_size=13, patch_size=2, + max_length=24, num_mel_bins=16, is_training=True, use_labels=True, @@ -67,6 +68,7 @@ def __init__( self.parent = parent self.batch_size = batch_size self.patch_size = patch_size + self.max_length = max_length self.num_mel_bins = num_mel_bins self.is_training = is_training self.use_labels = use_labels @@ -104,6 +106,7 @@ def prepare_config_and_inputs(self): def get_config(self): return ASTConfig( patch_size=self.patch_size, + max_length=self.max_length, num_mel_bins=self.num_mel_bins, hidden_size=self.hidden_size, num_hidden_layers=self.num_hidden_layers, From 3e50e5c90134598d9f623e0e267ed4cd89c92245 Mon Sep 17 00:00:00 2001 From: Joao Gante Date: Thu, 15 Aug 2024 10:51:42 +0000 Subject: [PATCH 27/35] one more edge case --- tests/generation/test_utils.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/generation/test_utils.py b/tests/generation/test_utils.py index 59d5036b8c8f..67d9e8741954 100644 --- a/tests/generation/test_utils.py +++ b/tests/generation/test_utils.py @@ -1566,6 +1566,10 @@ def test_generate_continue_from_past_key_values(self): # 3. ignore `token_type_ids` for simplicity # 4. ignore `forced_eos_token_id`, which requires further manipulation of the continuation inputs and is # active by default on some models + # 5. ignore `encoder_no_repeat_ngram_size`, which is set by default in some encoder-decoder models. When + # we use their decoder as a stand-alone model, `encoder_no_repeat_ngram_size` actually prevents + # repetition exclusively from the prompt. This test relies on comparing one call vs 2 calls + # with cache, what is considered a prompt is different in the two cases. config.use_cache = True if "token_type_ids" in inputs: del inputs["token_type_ids"] @@ -1574,6 +1578,7 @@ def test_generate_continue_from_past_key_values(self): model.eval() model.generation_config.pad_token_id = model.generation_config.eos_token_id = -1 model.generation_config.forced_eos_token_id = None + model.generation_config.encoder_no_repeat_ngram_size = 0 # If "past_key_values" is not returned, skip the test (e.g. RWKV uses a different cache name and format) outputs = model(**inputs) From 328c3f8bfdfb3fea9529afd357bd51e08004bcc1 Mon Sep 17 00:00:00 2001 From: Joao Gante Date: Thu, 15 Aug 2024 11:10:02 +0000 Subject: [PATCH 28/35] another one --- tests/models/mamba2/test_modeling_mamba2.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/models/mamba2/test_modeling_mamba2.py b/tests/models/mamba2/test_modeling_mamba2.py index 276ecf2fd6b0..68ca0e072a7e 100644 --- a/tests/models/mamba2/test_modeling_mamba2.py +++ b/tests/models/mamba2/test_modeling_mamba2.py @@ -227,6 +227,10 @@ def test_multi_gpu_data_parallel_forward(self): def test_generate_from_inputs_embeds_decoder_only(self): pass + @unittest.skip(reason="To fix, Mamba 2 cache slicing test case is an edge case") + def test_inputs_embeds_matches_input_ids_with_generate(self): + pass + def test_model_outputs_equivalence(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() From aa2e93e1d9fd8d1fa6562e664d81800fd68856a5 Mon Sep 17 00:00:00 2001 From: Joao Gante Date: Thu, 15 Aug 2024 11:31:17 +0000 Subject: [PATCH 29/35] this one is flaky --- tests/models/mobilevit/test_modeling_mobilevit.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/models/mobilevit/test_modeling_mobilevit.py b/tests/models/mobilevit/test_modeling_mobilevit.py index 9eb5878500d5..cd4cfa68e5dc 100644 --- a/tests/models/mobilevit/test_modeling_mobilevit.py +++ b/tests/models/mobilevit/test_modeling_mobilevit.py @@ -17,7 +17,7 @@ import unittest from transformers import MobileViTConfig -from transformers.testing_utils import require_torch, require_vision, slow, torch_device +from transformers.testing_utils import is_flaky, require_torch, require_vision, slow, torch_device from transformers.utils import cached_property, is_torch_available, is_vision_available from ...test_configuration_common import ConfigTester @@ -274,6 +274,10 @@ def test_model_from_pretrained(self): model = MobileViTModel.from_pretrained(model_name) self.assertIsNotNone(model) + @is_flaky(description="is_flaky https://github.com/huggingface/transformers/issues/29516") + def test_batching_equivalence(self): + super().test_batching_equivalence() + # We will verify our results on an image of cute cats def prepare_img(): From ac13bb1f7612588a17ce8bcab12e3d6810540cc8 Mon Sep 17 00:00:00 2001 From: Joao Gante Date: Fri, 16 Aug 2024 11:18:11 +0000 Subject: [PATCH 30/35] make fixup --- tests/models/mamba2/test_modeling_mamba2.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tests/models/mamba2/test_modeling_mamba2.py b/tests/models/mamba2/test_modeling_mamba2.py index 68ca0e072a7e..276ecf2fd6b0 100644 --- a/tests/models/mamba2/test_modeling_mamba2.py +++ b/tests/models/mamba2/test_modeling_mamba2.py @@ -227,10 +227,6 @@ def test_multi_gpu_data_parallel_forward(self): def test_generate_from_inputs_embeds_decoder_only(self): pass - @unittest.skip(reason="To fix, Mamba 2 cache slicing test case is an edge case") - def test_inputs_embeds_matches_input_ids_with_generate(self): - pass - def test_model_outputs_equivalence(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() From 021785bfbc754d97a2a958bd6de6c4ccbd5bfffd Mon Sep 17 00:00:00 2001 From: Joao Gante Date: Wed, 21 Aug 2024 09:32:12 +0000 Subject: [PATCH 31/35] accept storing None; moving sets to None --- src/transformers/configuration_utils.py | 16 +++++++++------- src/transformers/modeling_utils.py | 9 +++++---- tests/utils/test_modeling_utils.py | 22 ++++++++++++++++++---- 3 files changed, 32 insertions(+), 15 deletions(-) diff --git a/src/transformers/configuration_utils.py b/src/transformers/configuration_utils.py index 2fb5b8ff1a45..c6e3d90b9f0c 100755 --- a/src/transformers/configuration_utils.py +++ b/src/transformers/configuration_utils.py @@ -1037,15 +1037,17 @@ def _get_non_default_generation_parameters(self) -> Dict[str, Any]: for parameter_name, default_global_value in self._get_global_generation_defaults().items(): if hasattr(self_decoder_config, parameter_name): is_default_in_config = is_default_generation_value = None - # Two cases in which is okay for the model config to hold generation config parameters: - # 1. If we have a default config, then the instance should hold the same generation defaults + parameter_value = getattr(self_decoder_config, parameter_name) + # Three cases in which is okay for the model config to hold generation config parameters: + # 1. The parameter is set to `None`, effectivelly delegating its value to the generation config + if parameter_value is None: + continue + # 2. If we have a default config, then the instance should hold the same generation defaults if default_config is not None: - is_default_in_config = getattr(self_decoder_config, parameter_name) == getattr( - default_config, parameter_name - ) - # 2. if we don't have a default config, then the instance should hold the global generation defaults + is_default_in_config = parameter_value == getattr(default_config, parameter_name) + # 3. if we don't have a default config, then the instance should hold the global generation defaults else: - is_default_generation_value = getattr(self_decoder_config, parameter_name) == default_global_value + is_default_generation_value = parameter_value == default_global_value is_non_default = (is_default_in_config is False) or ( is_default_in_config is None and is_default_generation_value is False diff --git a/src/transformers/modeling_utils.py b/src/transformers/modeling_utils.py index d8d706a97c1f..b943b5e7989f 100755 --- a/src/transformers/modeling_utils.py +++ b/src/transformers/modeling_utils.py @@ -2574,14 +2574,15 @@ def save_pretrained( # If the model config has set attributes that should be in the generation config, move them there. misplaced_generation_parameters = model_to_save.config._get_non_default_generation_parameters() if self.can_generate() and len(misplaced_generation_parameters) > 0: - logger.warning( + warnings.warn( "Moving the following attributes in the config to the generation config: " - f"{misplaced_generation_parameters}" + f"{misplaced_generation_parameters}. You are seeing this warning because you've set " + "generation parameters in the model config, as opposed to in the generation config.", + UserWarning, ) - default_generation_arguments = model_to_save.config._get_global_generation_defaults() for param_name, param_value in misplaced_generation_parameters.items(): setattr(model_to_save.generation_config, param_name, param_value) - setattr(model_to_save.config, param_name, default_generation_arguments[param_name]) + setattr(model_to_save.config, param_name, None) model_to_save.config.save_pretrained(save_directory) if self.can_generate(): diff --git a/tests/utils/test_modeling_utils.py b/tests/utils/test_modeling_utils.py index f90d18b09553..238a9a1fe4d6 100644 --- a/tests/utils/test_modeling_utils.py +++ b/tests/utils/test_modeling_utils.py @@ -23,6 +23,7 @@ import unittest import unittest.mock as mock import uuid +import warnings from pathlib import Path import requests @@ -1605,11 +1606,24 @@ def test_modifying_model_config_gets_moved_to_generation_config(self): to the generation config. """ model = AutoModelForCausalLM.from_pretrained("openai-community/gpt2") + # Initially, the repetition penalty has its default value in `model.config`. The `model.generation_config` will + # have the exact same default + self.assertTrue(model.config.repetition_penalty == 1.0) + self.assertTrue(model.generation_config.repetition_penalty == 1.0) + # If the user attempts to save a custom generation parameter: model.config.repetition_penalty = 3.0 - with tempfile.TemporaryDirectory() as tmp_dir: - model.save_pretrained(tmp_dir) - self.assertTrue(model.config.repetition_penalty != 3.0) - self.assertTrue(model.generation_config.repetition_penalty == 3.0) + with warnings.catch_warnings(record=True) as warning_list: + with tempfile.TemporaryDirectory() as tmp_dir: + model.save_pretrained(tmp_dir) + # 1 - That parameter will be removed from `model.config`. We don't want to use `model.config` to store + # generative parameters, and the old default (1.0) would no longer relect the user's wishes. + self.assertTrue(model.config.repetition_penalty is None) + # 2 - That parameter will be set in `model.generation_config` instead. + self.assertTrue(model.generation_config.repetition_penalty == 3.0) + # 3 - The user will see a warning regarding the custom parameter that has been moved. + self.assertTrue(len(warning_list) == 1) + self.assertTrue("Moving the following attributes" in str(warning_list[0].message)) + self.assertTrue("repetition_penalty" in str(warning_list[0].message)) @require_safetensors def test_model_from_pretrained_from_mlx(self): From 747bb1d3d371a7260f2d1ad137e7bd7263f997d5 Mon Sep 17 00:00:00 2001 From: Joao Gante Date: Wed, 21 Aug 2024 10:48:14 +0100 Subject: [PATCH 32/35] Update src/transformers/models/rag/configuration_rag.py Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com> --- src/transformers/models/rag/configuration_rag.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/models/rag/configuration_rag.py b/src/transformers/models/rag/configuration_rag.py index d1fab393557b..98de7382a456 100644 --- a/src/transformers/models/rag/configuration_rag.py +++ b/src/transformers/models/rag/configuration_rag.py @@ -127,7 +127,7 @@ def __init__( if "question_encoder" not in kwargs or "generator" not in kwargs: raise ValueError( f"A configuraton of type {self.model_type} cannot be instantiated because " - f"not both `question_encoder` and `generator` sub-configurations are passed, but only {kwargs}" + f"both `question_encoder` and `generator` sub-configurations were not passed, only {kwargs}" ) question_encoder_config = kwargs.pop("question_encoder") question_encoder_model_type = question_encoder_config.pop("model_type") From 9f91b5d5132c06305d18a69168049b25ff477b5c Mon Sep 17 00:00:00 2001 From: Joao Gante Date: Wed, 21 Aug 2024 10:48:23 +0100 Subject: [PATCH 33/35] Update src/transformers/models/encoder_decoder/configuration_encoder_decoder.py Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com> --- .../models/encoder_decoder/configuration_encoder_decoder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/models/encoder_decoder/configuration_encoder_decoder.py b/src/transformers/models/encoder_decoder/configuration_encoder_decoder.py index 62a71cd72b15..ab5d49b32fea 100644 --- a/src/transformers/models/encoder_decoder/configuration_encoder_decoder.py +++ b/src/transformers/models/encoder_decoder/configuration_encoder_decoder.py @@ -77,7 +77,7 @@ def __init__(self, **kwargs): if "encoder" not in kwargs or "decoder" not in kwargs: raise ValueError( f"A configuraton of type {self.model_type} cannot be instantiated because " - f"not both `encoder` and `decoder` sub-configurations are passed, but only {kwargs}" + f"both `encoder` and `decoder` sub-configurations were not passed, only {kwargs}" ) encoder_config = kwargs.pop("encoder") encoder_model_type = encoder_config.pop("model_type") From 146ea02476533dd2f2a4890aee834ed0d1a505f8 Mon Sep 17 00:00:00 2001 From: Joao Gante Date: Thu, 22 Aug 2024 11:53:17 +0000 Subject: [PATCH 34/35] remove CI TF examples fix (moved to #32935) --- .circleci/create_circleci_config.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.circleci/create_circleci_config.py b/.circleci/create_circleci_config.py index 2e0af429a207..d783488caecc 100644 --- a/.circleci/create_circleci_config.py +++ b/.circleci/create_circleci_config.py @@ -327,8 +327,7 @@ def job_name(self): cache_name="tensorflow_examples", docker_image=[{"image":"huggingface/transformers-examples-tf"}], install_steps=["uv venv && uv pip install . && uv pip install -r examples/tensorflow/_tests_requirements.txt"], - parallelism=8, - pytest_num_workers=1 + parallelism=8 ) From e9a9a8225e605af7beb38d2dc1f162498950dc94 Mon Sep 17 00:00:00 2001 From: Joao Gante Date: Fri, 23 Aug 2024 09:25:00 +0000 Subject: [PATCH 35/35] [test_all] trigger all tests