diff --git a/setup.py b/setup.py index 2b74308081ef..cb3e90adcd8d 100644 --- a/setup.py +++ b/setup.py @@ -117,7 +117,7 @@ "GitPython<3.1.19", "hf-doc-builder>=0.3.0", "hf_xet", - "huggingface-hub>=0.30.0,<1.0", + "huggingface-hub>=0.31.4,<1.0", "importlib_metadata", "ipadic>=1.0.0,<2.0", "isort>=5.5.4", diff --git a/src/transformers/configuration_utils.py b/src/transformers/configuration_utils.py index 205a7dde8f28..4a8b72ee44d4 100755 --- a/src/transformers/configuration_utils.py +++ b/src/transformers/configuration_utils.py @@ -208,7 +208,19 @@ def __getattribute__(self, key): key = super().__getattribute__("attribute_map")[key] return super().__getattribute__(key) + def __post_init__(self): + self._set_defaults() + def __init__(self, **kwargs): + self._set_defaults(**kwargs) + + def _set_defaults(self, **kwargs): + def _get_default_if_unset(attribute_name, default_value): + set_attribute = getattr(self, attribute_name, None) + if set_attribute is not None: + return set_attribute + return kwargs.pop(attribute_name, default_value) + # Attributes with defaults self.return_dict = kwargs.pop("return_dict", True) self.output_hidden_states = kwargs.pop("output_hidden_states", False) @@ -267,12 +279,11 @@ def __init__(self, **kwargs): # Tokenizer arguments TODO: eventually tokenizer and models should share the same config self.tokenizer_class = kwargs.pop("tokenizer_class", None) self.prefix = kwargs.pop("prefix", None) - self.bos_token_id = kwargs.pop("bos_token_id", None) - self.pad_token_id = kwargs.pop("pad_token_id", None) - self.eos_token_id = kwargs.pop("eos_token_id", None) - self.sep_token_id = kwargs.pop("sep_token_id", None) - - self.decoder_start_token_id = kwargs.pop("decoder_start_token_id", None) + self.bos_token_id = _get_default_if_unset("bos_token_id", None) + self.pad_token_id = _get_default_if_unset("pad_token_id", None) + self.eos_token_id = _get_default_if_unset("eos_token_id", None) + self.sep_token_id = _get_default_if_unset("sep_token_id", None) + self.decoder_start_token_id = _get_default_if_unset("decoder_start_token_id", None) # task specific arguments self.task_specific_params = kwargs.pop("task_specific_params", None) @@ -382,6 +393,29 @@ def _attn_implementation(self): def _attn_implementation(self, value): self._attn_implementation_internal = value + @property + def attn_implementation(self): + return self._attn_implementation + + @attn_implementation.setter + def attn_implementation(self, value): + self._attn_implementation = value + + def validate_token_ids(self): + """Part of `@strict`-powered validation. Validates the contents of the special tokens.""" + text_config = self.get_text_config() + vocab_size = getattr(text_config, "vocab_size", None) + if vocab_size is not None: + for token_name in ["pad_token_id", "bos_token_id", "eos_token_id"]: + token_id = getattr(text_config, token_name, None) + if token_id is not None and not 0 <= token_id < vocab_size: + # Can't be an exception until we can load configs that fail validation: several configs on the Hub + # store invalid special tokens, e.g. `pad_token_id=-1` + logger.warning_once( + f"Model config: {token_name} must be `None` or an integer within the vocabulary (between 0 " + f"and {vocab_size - 1}), got {token_id}. This may result in unexpected behavior." + ) + def save_pretrained(self, save_directory: Union[str, os.PathLike], push_to_hub: bool = False, **kwargs): """ Save a configuration object to the directory `save_directory`, so that it can be re-loaded using the @@ -414,6 +448,10 @@ def save_pretrained(self, save_directory: Union[str, os.PathLike], push_to_hub: UserWarning, ) + # Strict validation at save-time: prevent bad patterns from propagating + if hasattr(self, "validate"): + self.validate() + os.makedirs(save_directory, exist_ok=True) if push_to_hub: diff --git a/src/transformers/dependency_versions_table.py b/src/transformers/dependency_versions_table.py index 5c0ae6b772f3..625fd04eee47 100644 --- a/src/transformers/dependency_versions_table.py +++ b/src/transformers/dependency_versions_table.py @@ -24,7 +24,7 @@ "GitPython": "GitPython<3.1.19", "hf-doc-builder": "hf-doc-builder>=0.3.0", "hf_xet": "hf_xet", - "huggingface-hub": "huggingface-hub>=0.30.0,<1.0", + "huggingface-hub": "huggingface-hub>=0.31.4,<1.0", "importlib_metadata": "importlib_metadata", "ipadic": "ipadic>=1.0.0,<2.0", "isort": "isort>=5.5.4", diff --git a/src/transformers/modeling_utils.py b/src/transformers/modeling_utils.py index bd09c1ae57d1..42aa36210d73 100644 --- a/src/transformers/modeling_utils.py +++ b/src/transformers/modeling_utils.py @@ -2025,6 +2025,10 @@ def __init__(self, config: PretrainedConfig, *inputs, **kwargs): "`PretrainedConfig`. To create a model from a pretrained model use " f"`model = {self.__class__.__name__}.from_pretrained(PRETRAINED_MODEL_NAME)`" ) + # class-level validation of config (as opposed to the attribute-level validation provided by `@strict`) + if hasattr(config, "validate"): + config.validate() + if not getattr(config, "_attn_implementation_autoset", False): # config usually has a `torch_dtype` but we need the next line for the `no_super_init` tests dtype = config.torch_dtype if hasattr(config, "torch_dtype") else torch.get_default_dtype() diff --git a/src/transformers/models/albert/configuration_albert.py b/src/transformers/models/albert/configuration_albert.py index e1e2d4547cc4..6486484d3c60 100644 --- a/src/transformers/models/albert/configuration_albert.py +++ b/src/transformers/models/albert/configuration_albert.py @@ -16,12 +16,18 @@ """ALBERT model configuration""" from collections import OrderedDict -from typing import Mapping +from dataclasses import dataclass +from typing import Literal, Mapping, Optional, Union + +from huggingface_hub.dataclasses import strict from ...configuration_utils import PretrainedConfig from ...onnx import OnnxConfig +from ...validators import activation_fn_key, interval, probability +@strict(accept_kwargs=True) +@dataclass class AlbertConfig(PretrainedConfig): r""" This is the configuration class to store the configuration of a [`AlbertModel`] or a [`TFAlbertModel`]. It is used @@ -53,9 +59,9 @@ class AlbertConfig(PretrainedConfig): hidden_act (`str` or `Callable`, *optional*, defaults to `"gelu_new"`): The non-linear activation function (function or string) in the encoder and pooler. If string, `"gelu"`, `"relu"`, `"silu"` and `"gelu_new"` are supported. - hidden_dropout_prob (`float`, *optional*, defaults to 0): + hidden_dropout_prob (`float`, *optional*, defaults to 0.0): The dropout probability for all fully connected layers in the embeddings, encoder, and pooler. - attention_probs_dropout_prob (`float`, *optional*, defaults to 0): + attention_probs_dropout_prob (`float`, *optional*, defaults to 0.0): The dropout ratio for the attention probabilities. max_position_embeddings (`int`, *optional*, defaults to 512): The maximum sequence length that this model might ever be used with. Typically set this to something large @@ -103,51 +109,37 @@ class AlbertConfig(PretrainedConfig): >>> configuration = model.config ```""" + vocab_size: int = interval(min=1)(default=30000) + embedding_size: int = interval(min=1)(default=128) + hidden_size: int = interval(min=1)(default=4096) + num_hidden_layers: int = interval(min=1)(default=12) + num_hidden_groups: int = interval(min=1)(default=1) + num_attention_heads: int = interval(min=0)(default=64) + intermediate_size: int = interval(min=1)(default=16384) + inner_group_num: int = interval(min=0)(default=1) + hidden_act: str = activation_fn_key(default="gelu_new") + hidden_dropout_prob: Union[float, int] = probability(default=0.0) + attention_probs_dropout_prob: Union[float, int] = probability(default=0.0) + max_position_embeddings: int = interval(min=0)(default=512) + type_vocab_size: int = interval(min=1)(default=2) + initializer_range: float = interval(min=0.0)(default=0.02) + layer_norm_eps: float = interval(min=0.0)(default=1e-12) + classifier_dropout_prob: float = probability(default=0.1) + position_embedding_type: Literal["absolute", "relative_key", "relative_key_query"] = "absolute" + pad_token_id: Optional[int] = 0 + bos_token_id: Optional[int] = 2 + eos_token_id: Optional[int] = 3 + + # Not part of __init__ model_type = "albert" - def __init__( - self, - vocab_size=30000, - embedding_size=128, - hidden_size=4096, - num_hidden_layers=12, - num_hidden_groups=1, - num_attention_heads=64, - intermediate_size=16384, - inner_group_num=1, - hidden_act="gelu_new", - hidden_dropout_prob=0, - attention_probs_dropout_prob=0, - max_position_embeddings=512, - type_vocab_size=2, - initializer_range=0.02, - layer_norm_eps=1e-12, - classifier_dropout_prob=0.1, - position_embedding_type="absolute", - pad_token_id=0, - bos_token_id=2, - eos_token_id=3, - **kwargs, - ): - super().__init__(pad_token_id=pad_token_id, bos_token_id=bos_token_id, eos_token_id=eos_token_id, **kwargs) - - self.vocab_size = vocab_size - self.embedding_size = embedding_size - self.hidden_size = hidden_size - self.num_hidden_layers = num_hidden_layers - self.num_hidden_groups = num_hidden_groups - self.num_attention_heads = num_attention_heads - self.inner_group_num = inner_group_num - self.hidden_act = hidden_act - self.intermediate_size = intermediate_size - self.hidden_dropout_prob = hidden_dropout_prob - self.attention_probs_dropout_prob = attention_probs_dropout_prob - self.max_position_embeddings = max_position_embeddings - self.type_vocab_size = type_vocab_size - self.initializer_range = initializer_range - self.layer_norm_eps = layer_norm_eps - self.classifier_dropout_prob = classifier_dropout_prob - self.position_embedding_type = position_embedding_type + def validate_architecture(self): + """Part of `@strict`-powered validation. Validates the architecture of the config.""" + if self.hidden_size % self.num_attention_heads != 0: + raise ValueError( + f"The hidden size ({self.hidden_size}) is not a multiple of the number of attention " + f"heads ({self.num_attention_heads})." + ) # Copied from transformers.models.bert.configuration_bert.BertOnnxConfig with Roberta->Albert diff --git a/src/transformers/models/albert/modeling_albert.py b/src/transformers/models/albert/modeling_albert.py index 11fd1f939ccd..640bc1a56014 100755 --- a/src/transformers/models/albert/modeling_albert.py +++ b/src/transformers/models/albert/modeling_albert.py @@ -245,12 +245,6 @@ def forward( class AlbertAttention(nn.Module): def __init__(self, config: AlbertConfig): super().__init__() - if config.hidden_size % config.num_attention_heads != 0 and not hasattr(config, "embedding_size"): - raise ValueError( - f"The hidden size ({config.hidden_size}) is not a multiple of the number of attention " - f"heads ({config.num_attention_heads}" - ) - self.num_attention_heads = config.num_attention_heads self.hidden_size = config.hidden_size self.attention_head_size = config.hidden_size // config.num_attention_heads diff --git a/src/transformers/validators.py b/src/transformers/validators.py new file mode 100644 index 000000000000..3373619bc358 --- /dev/null +++ b/src/transformers/validators.py @@ -0,0 +1,106 @@ +# coding=utf-8 +# Copyright 2025-present the HuggingFace Inc. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Validators to be used with `huggingface_hub.dataclasses.validated_field`. We recommend using the validator(s) that best +describe the constraints of your dataclass fields, for the best user experience (e.g. better error messages). +""" + +from typing import Callable, Optional, Union + +from huggingface_hub.dataclasses import as_validated_field + +from .utils import is_torch_available + + +if is_torch_available(): + from .activations import ACT2FN +else: + ACT2FN = {} + + +# Numerical validators + + +def interval( + min: Optional[Union[int, float]] = None, + max: Optional[Union[int, float]] = None, + exclude_min: bool = False, + exclude_max: bool = False, +) -> Callable: + """ + Parameterized validator that ensures that `value` is within the defined interval. Optionally, the interval can be + open on either side. Expected usage: `interval(min=0)(default=8)` + + Args: + min (`int` or `float`, *optional*): + Minimum value of the interval. + max (`int` or `float`, *optional*): + Maximum value of the interval. + exclude_min (`bool`, *optional*, defaults to `False`): + If True, the minimum value is excluded from the interval. + exclude_max (`bool`, *optional*, defaults to `False`): + If True, the maximum value is excluded from the interval. + """ + error_message = "Value must be" + if min is not None: + if exclude_min: + error_message += f" greater than {min}" + else: + error_message += f" greater or equal to {min}" + if min is not None and max is not None: + error_message += " and" + if max is not None: + if exclude_max: + error_message += f" smaller than {max}" + else: + error_message += f" smaller or equal to {max}" + error_message += ", got {value}." + + min = min or float("-inf") + max = max or float("inf") + + @as_validated_field + def _inner(value: Union[int, float]): + min_valid = min <= value if not exclude_min else min < value + max_valid = value <= max if not exclude_max else value < max + if not (min_valid and max_valid): + raise ValueError(error_message.format(value=value)) + + return _inner + + +@as_validated_field +def probability(value: float): + """Ensures that `value` is a valid probability number, i.e. [0,1].""" + if not 0 <= value <= 1: + raise ValueError(f"Value must be a probability between 0.0 and 1.0, got {value}.") + + +# String validators + + +@as_validated_field +def activation_fn_key(value: str): + """Ensures that `value` is a string corresponding to an activation function.""" + # TODO (joao): in python 3.11+, we can build a Literal type from the keys of ACT2FN + if len(ACT2FN) > 0: # don't validate if we can't import ACT2FN + if value not in ACT2FN: + raise ValueError( + f"Value must be one of {list(ACT2FN.keys())}, got {value}. " + "Make sure to use a string that corresponds to an activation function." + ) + + +__all__ = ["interval", "probability", "activation_fn_key"] diff --git a/tests/models/albert/test_modeling_albert.py b/tests/models/albert/test_modeling_albert.py index 349a809b07c8..4dddf0ec5b5a 100644 --- a/tests/models/albert/test_modeling_albert.py +++ b/tests/models/albert/test_modeling_albert.py @@ -275,7 +275,7 @@ def _prepare_for_class(self, inputs_dict, model_class, return_labels=False): def setUp(self): self.model_tester = AlbertModelTester(self) - self.config_tester = ConfigTester(self, config_class=AlbertConfig, hidden_size=37) + self.config_tester = ConfigTester(self, config_class=AlbertConfig, hidden_size=64) def test_config(self): self.config_tester.run_common_tests() diff --git a/tests/test_configuration_common.py b/tests/test_configuration_common.py index 4d4ce3a3f165..fcf559d8ae43 100644 --- a/tests/test_configuration_common.py +++ b/tests/test_configuration_common.py @@ -53,11 +53,14 @@ def create_and_test_config_common_properties(self): self.parent.assertTrue(hasattr(config, prop), msg=f"`{prop}` does not exist") # Test that config has the common properties as setter - for idx, name in enumerate(common_properties): + dummy_value = 64 + for name in common_properties: try: - setattr(config, name, idx) + setattr(config, name, dummy_value) self.parent.assertEqual( - getattr(config, name), idx, msg=f"`{name} value {idx} expected, but was {getattr(config, name)}" + getattr(config, name), + dummy_value, + msg=f"`{name} value {dummy_value} expected, but was {getattr(config, name)}", ) except NotImplementedError: # Some models might not be able to implement setters for common_properties @@ -65,11 +68,13 @@ def create_and_test_config_common_properties(self): pass # Test if config class can be called with Config(prop_name=..) - for idx, name in enumerate(common_properties): + for name in common_properties: try: - config = self.config_class(**{name: idx}) + config = self.config_class(**{name: dummy_value}) self.parent.assertEqual( - getattr(config, name), idx, msg=f"`{name} value {idx} expected, but was {getattr(config, name)}" + getattr(config, name), + dummy_value, + msg=f"`{name} value {dummy_value} expected, but was {getattr(config, name)}", ) except NotImplementedError: # Some models might not be able to implement setters for common_properties diff --git a/tests/utils/test_validators.py b/tests/utils/test_validators.py new file mode 100644 index 000000000000..58f352e2228d --- /dev/null +++ b/tests/utils/test_validators.py @@ -0,0 +1,176 @@ +# Copyright 2025 HuggingFace Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import tempfile +import unittest +from dataclasses import dataclass +from typing import Union + +from huggingface_hub.dataclasses import ( + StrictDataclassClassValidationError, + StrictDataclassFieldValidationError, + strict, +) + +from transformers import AlbertConfig, logging +from transformers.testing_utils import CaptureLogger +from transformers.validators import activation_fn_key, interval, probability + + +class ValidatorsTests(unittest.TestCase): + """ + Sanity check tests for the validators. Validators are `field` in a dataclass, and not meant to be used on + their own. + """ + + def test_interval(self): + # Setup test dataclasses + @strict + @dataclass + class TestInterval: + data: Union[int, float] = interval(min=1, max=10)() + + @strict + @dataclass + class TestIntervalExcludeMinMax: + data: Union[int, float] = interval(min=1, max=10, exclude_min=True, exclude_max=True)() + + # valid + TestInterval(5) + TestInterval(5.0) + TestInterval(10) + TestInterval(1) + TestIntervalExcludeMinMax(1.0000001) + + # invalid + with self.assertRaises(StrictDataclassFieldValidationError): + TestInterval("one") # different type + with self.assertRaises(StrictDataclassFieldValidationError): + TestInterval(11) # greater than max + with self.assertRaises(StrictDataclassFieldValidationError): + TestInterval(0.9999999) # less than min + with self.assertRaises(StrictDataclassFieldValidationError): + TestIntervalExcludeMinMax(10) # equal to max, but exclude_max is True + with self.assertRaises(StrictDataclassFieldValidationError): + TestIntervalExcludeMinMax(1.0) # equal to min, but exclude_min is True + with self.assertRaises(StrictDataclassFieldValidationError): + TestInterval(-5) # less than min + + def test_probability(self): + # Setup test dataclasses + @strict + @dataclass + class TestProbability: + data: float = probability() + + # valid + TestProbability(0.5) + TestProbability(0.0) + TestProbability(1.0) + + # invalid + with self.assertRaises(StrictDataclassFieldValidationError): + TestProbability(1) # different type + with self.assertRaises(StrictDataclassFieldValidationError): + TestProbability(99.0) # 0-1 probabilities only + with self.assertRaises(StrictDataclassFieldValidationError): + TestProbability(1.1) # greater than 1 + with self.assertRaises(StrictDataclassFieldValidationError): + TestProbability(-0.1) # less than 0 + + def test_activation_fn_key(self): + # Setup test dataclasses + @strict + @dataclass + class TestActivationFnKey: + data: str = activation_fn_key() + + # valid + TestActivationFnKey("relu") + TestActivationFnKey("gelu") + + # invalid + with self.assertRaises(StrictDataclassFieldValidationError): + TestActivationFnKey("foo") # obvious one + with self.assertRaises(StrictDataclassFieldValidationError): + TestActivationFnKey(None) # different type: can't be None + with self.assertRaises(StrictDataclassFieldValidationError): + TestActivationFnKey("Relu") # typo: should be "relu", not "Relu" + + +class ValidatorsIntegrationTests(unittest.TestCase): + """Tests in which the validators are used as part of another class/function""" + + def test_model_config_validation(self): + """Sanity check tests for the integration of model config with `huggingface_hub.dataclasses.strict`""" + # 1 - We can initialize the config, including with arbitrary kwargs + config = AlbertConfig() + config = AlbertConfig(eos_token_id=5) + self.assertEqual(config.eos_token_id, 5) + config = AlbertConfig(eos_token_id=None) + self.assertIsNone(config.eos_token_id) + config = AlbertConfig(foo="bar") # Ensures backwards compatibility + self.assertEqual(config.foo, "bar") + + # 2 - Manual specification, traveling through an invalid config, should be allowed + config.hidden_size = 65 # breaks class-wide validation, see `AlbertConfig.validate_architecture` + with self.assertRaises(StrictDataclassClassValidationError): + config.validate() + config.num_attention_heads = 5 # 65 % 5 = 0 -> valid + config.validate() + + # 3 - These cases should raise an error + + # vocab_size is an int + with self.assertRaises(StrictDataclassFieldValidationError): + config = AlbertConfig(vocab_size=10.0) + + # num_hidden_layers is an int + with self.assertRaises(StrictDataclassFieldValidationError): + config = AlbertConfig(num_hidden_layers=None) + + # position_embedding_type is a Literal, foo is not one of the options + with self.assertRaises(StrictDataclassFieldValidationError): + config = AlbertConfig(position_embedding_type="foo") + + # `@strict` calls `validate()` in `__post_init__`, i.e. after `__init__`. All functions defined as + # `validate_XXX(self)` will be called as part of the validation process. In this case, a special token must + # be in the vocabulary, and the validation function is defined in the base config class. + logger = logging.get_logger("transformers.configuration_utils") + with CaptureLogger(logger) as captured_logs: + config = AlbertConfig(vocab_size=10, eos_token_id=99) + self.assertIn("eos_token_id must be `None` or an integer within the vocabulary", captured_logs.out) + + # Similar to the previous case, but the validation function is defined in the model config class. The hidden + # size must be divisible by the number of attention heads. + with self.assertRaises(StrictDataclassClassValidationError): + config = AlbertConfig(hidden_size=10, num_attention_heads=3) + + # vocab size is assigned after init, individual attributes are checked on assignment + with self.assertRaises(StrictDataclassFieldValidationError): + config = AlbertConfig() + config.vocab_size = "foo" + + def test_bad_config_cant_be_saved(self): + """Test that a bad config can't be saved""" + # 1 - create a good config, modify it so it fails class-wide validation + config = AlbertConfig() + config.validate() + config.hidden_size = 65 # breaks class-wide validation, see `AlbertConfig.validate_architecture` + + # 2 - try to save it, and check that the error message is correct + with tempfile.TemporaryDirectory() as tmp_dir: + with self.assertRaises(StrictDataclassClassValidationError) as exc: + config.save_pretrained(tmp_dir) + # start of the message in `AlbertConfig.validate_architecture` + self.assertTrue("The hidden size " in str(exc.exception))