diff --git a/vllm/transformers_utils/configs/olmo_hybrid.py b/vllm/transformers_utils/configs/olmo_hybrid.py index 1087124c706f..2a60f29025a0 100644 --- a/vllm/transformers_utils/configs/olmo_hybrid.py +++ b/vllm/transformers_utils/configs/olmo_hybrid.py @@ -2,7 +2,7 @@ # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -from transformers.configuration_utils import PretrainedConfig, layer_type_validation +from transformers.configuration_utils import PretrainedConfig class OlmoHybridConfig(PretrainedConfig): @@ -228,7 +228,15 @@ def __init__( if "full_attention" not in layer_types: layer_types[-1] = "full_attention" - layer_type_validation(layer_types, num_hidden_layers) + if hasattr(self, "validate_layer_type"): + # Transformers v5 + self.layer_types = layer_types + self.validate_layer_type() + else: + # Transformers v4 + from transformers.configuration_utils import layer_type_validation + + layer_type_validation(layer_types, num_hidden_layers) if "linear_attention" not in layer_types: raise ValueError( "OLMoHybrid expects at least one 'linear_attention' layer." diff --git a/vllm/transformers_utils/configs/qwen3_5.py b/vllm/transformers_utils/configs/qwen3_5.py index 9d43986a6e4d..3192e5e9a166 100644 --- a/vllm/transformers_utils/configs/qwen3_5.py +++ b/vllm/transformers_utils/configs/qwen3_5.py @@ -16,7 +16,7 @@ # limitations under the License. """Qwen3.5 model configuration""" -from transformers.configuration_utils import PretrainedConfig, layer_type_validation +from transformers.configuration_utils import PretrainedConfig class Qwen3_5TextConfig(PretrainedConfig): @@ -68,10 +68,6 @@ def __init__( eos_token_id=None, **kwargs, ): - kwargs["ignore_keys_at_rope_validation"] = [ - "mrope_section", - "mrope_interleaved", - ] self.vocab_size = vocab_size self.max_position_embeddings = max_position_embeddings self.hidden_size = hidden_size @@ -98,7 +94,18 @@ def __init__( else "full_attention" for i in range(self.num_hidden_layers) ] - layer_type_validation(self.layer_types, self.num_hidden_layers) + if hasattr(self, "validate_layer_type"): + # Transformers v5 + kwargs["ignore_keys_at_rope_validation"] = { + "mrope_section", + "mrope_interleaved", + } + self.validate_layer_type() + else: + # Transformers v4 + from transformers.configuration_utils import layer_type_validation + + layer_type_validation(self.layer_types, self.num_hidden_layers) # linear attention part self.linear_conv_kernel_dim = linear_conv_kernel_dim diff --git a/vllm/transformers_utils/configs/qwen3_5_moe.py b/vllm/transformers_utils/configs/qwen3_5_moe.py index 41a1f7ed90e3..9d9987ce03ee 100644 --- a/vllm/transformers_utils/configs/qwen3_5_moe.py +++ b/vllm/transformers_utils/configs/qwen3_5_moe.py @@ -16,7 +16,7 @@ # limitations under the License. """Qwen3.5-MoE model configuration""" -from transformers.configuration_utils import PretrainedConfig, layer_type_validation +from transformers.configuration_utils import PretrainedConfig class Qwen3_5MoeTextConfig(PretrainedConfig): @@ -75,10 +75,6 @@ def __init__( eos_token_id=None, **kwargs, ): - kwargs["ignore_keys_at_rope_validation"] = [ - "mrope_section", - "mrope_interleaved", - ] self.vocab_size = vocab_size self.max_position_embeddings = max_position_embeddings self.hidden_size = hidden_size @@ -104,7 +100,18 @@ def __init__( else "full_attention" for i in range(self.num_hidden_layers) ] - layer_type_validation(self.layer_types, self.num_hidden_layers) + if hasattr(self, "validate_layer_type"): + # Transformers v5 + kwargs["ignore_keys_at_rope_validation"] = { + "mrope_section", + "mrope_interleaved", + } + self.validate_layer_type() + else: + # Transformers v4 + from transformers.configuration_utils import layer_type_validation + + layer_type_validation(self.layer_types, self.num_hidden_layers) # linear attention part self.linear_conv_kernel_dim = linear_conv_kernel_dim diff --git a/vllm/transformers_utils/configs/qwen3_next.py b/vllm/transformers_utils/configs/qwen3_next.py index 8230a18343c5..a49a26378d2c 100644 --- a/vllm/transformers_utils/configs/qwen3_next.py +++ b/vllm/transformers_utils/configs/qwen3_next.py @@ -16,7 +16,7 @@ # limitations under the License. """Qwen3-Next model configuration""" -from transformers.configuration_utils import PretrainedConfig, layer_type_validation +from transformers.configuration_utils import PretrainedConfig from transformers.utils import logging logger = logging.get_logger(__name__) @@ -253,7 +253,14 @@ def __init__( "linear_attention" if bool((i + 1) % 4) else "full_attention" for i in range(self.num_hidden_layers) ] - layer_type_validation(self.layer_types) + if hasattr(self, "validate_layer_type"): + # Transformers v5 + self.validate_layer_type() + else: + # Transformers v4 + from transformers.configuration_utils import layer_type_validation + + layer_type_validation(self.layer_types) # linear attention part self.linear_conv_kernel_dim = linear_conv_kernel_dim