diff --git a/vllm/model_executor/models/ernie45_moe.py b/vllm/model_executor/models/ernie45_moe.py index 452c7624dcc0..f038cfb21f28 100644 --- a/vllm/model_executor/models/ernie45_moe.py +++ b/vllm/model_executor/models/ernie45_moe.py @@ -421,7 +421,6 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): cache_config = vllm_config.cache_config quant_config = vllm_config.quant_config - self.padding_idx = config.pad_token_id self.vocab_size = config.vocab_size self.config = config parallel_config = vllm_config.parallel_config diff --git a/vllm/model_executor/models/ernie45_vl_moe.py b/vllm/model_executor/models/ernie45_vl_moe.py index 9d3cbbecff17..376de71ada1e 100644 --- a/vllm/model_executor/models/ernie45_vl_moe.py +++ b/vllm/model_executor/models/ernie45_vl_moe.py @@ -523,7 +523,6 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): cache_config = vllm_config.cache_config quant_config = vllm_config.quant_config - self.padding_idx = config.pad_token_id self.vocab_size = config.vocab_size self.config = config diff --git a/vllm/model_executor/models/granitemoeshared.py b/vllm/model_executor/models/granitemoeshared.py index 93e8698149d6..7abc682c58e5 100644 --- a/vllm/model_executor/models/granitemoeshared.py +++ b/vllm/model_executor/models/granitemoeshared.py @@ -157,7 +157,6 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): self.config = config self.quant_config = quant_config # Required by MixtralModel - self.padding_idx = config.pad_token_id self.vocab_size = config.vocab_size diff --git a/vllm/model_executor/models/grok1.py b/vllm/model_executor/models/grok1.py index e2943b7978b4..0bd6a8f3d606 100644 --- a/vllm/model_executor/models/grok1.py +++ b/vllm/model_executor/models/grok1.py @@ -451,7 +451,6 @@ def __init__( self.config = config self.quant_config = quant_config - self.padding_idx = config.pad_token_id # Store expert naming for weight loading self.ckpt_gate_proj_name = ckpt_gate_proj_name diff --git a/vllm/model_executor/models/hunyuan_v1.py b/vllm/model_executor/models/hunyuan_v1.py index a07bea16ce5e..584645f1fbf1 100644 --- a/vllm/model_executor/models/hunyuan_v1.py +++ b/vllm/model_executor/models/hunyuan_v1.py @@ -600,7 +600,6 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): self.config = config self.quant_config = quant_config - self.padding_idx = config.pad_token_id self.vocab_size = config.vocab_size diff --git a/vllm/model_executor/models/jais2.py b/vllm/model_executor/models/jais2.py index ea06ee1b1c7a..4e03eb12ee44 100644 --- a/vllm/model_executor/models/jais2.py +++ b/vllm/model_executor/models/jais2.py @@ -305,7 +305,6 @@ def __init__( self.config = config self.quant_config = quant_config - self.padding_idx = config.pad_token_id self.vocab_size = config.vocab_size self.org_vocab_size = config.vocab_size diff --git a/vllm/model_executor/models/kimi_linear.py b/vllm/model_executor/models/kimi_linear.py index 1793397e1cc8..e36ff0227e96 100644 --- a/vllm/model_executor/models/kimi_linear.py +++ b/vllm/model_executor/models/kimi_linear.py @@ -393,7 +393,6 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): parallel_config = vllm_config.parallel_config self.config = config - self.padding_idx = config.pad_token_id self.vocab_size = config.vocab_size if get_pp_group().is_first_rank: diff --git a/vllm/model_executor/models/longcat_flash.py b/vllm/model_executor/models/longcat_flash.py index 32408e7c3e33..c90cc2d39a95 100644 --- a/vllm/model_executor/models/longcat_flash.py +++ b/vllm/model_executor/models/longcat_flash.py @@ -486,7 +486,6 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): quant_config = vllm_config.quant_config self.config = config - self.padding_idx = getattr(config, "pad_token_id", None) self.vocab_size = config.vocab_size if get_pp_group().is_first_rank: diff --git a/vllm/model_executor/models/minimax_text_01.py b/vllm/model_executor/models/minimax_text_01.py index a7785bcfc3df..80c0342ccca4 100644 --- a/vllm/model_executor/models/minimax_text_01.py +++ b/vllm/model_executor/models/minimax_text_01.py @@ -495,7 +495,6 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): cache_config = vllm_config.cache_config scheduler_config = vllm_config.scheduler_config - self.padding_idx = config.pad_token_id self.vocab_size = config.vocab_size self.decoder_attention_types = getattr( diff --git a/vllm/model_executor/models/nemotron_nas.py b/vllm/model_executor/models/nemotron_nas.py index 6d796a5b2708..f2f3811c0644 100644 --- a/vllm/model_executor/models/nemotron_nas.py +++ b/vllm/model_executor/models/nemotron_nas.py @@ -241,7 +241,6 @@ def __init__( self.config = config self.quant_config = quant_config - self.padding_idx = config.pad_token_id self.vocab_size = config.vocab_size diff --git a/vllm/model_executor/models/openpangu.py b/vllm/model_executor/models/openpangu.py index 04cdc5b6bb34..994ae82529ab 100644 --- a/vllm/model_executor/models/openpangu.py +++ b/vllm/model_executor/models/openpangu.py @@ -1029,7 +1029,6 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): self.config = config self.num_redundant_experts = eplb_config.num_redundant_experts - self.padding_idx = config.pad_token_id self.vocab_size = config.vocab_size if get_pp_group().is_first_rank or ( diff --git a/vllm/model_executor/models/plamo2.py b/vllm/model_executor/models/plamo2.py index 68f0b9550dc3..f8fff2ccb403 100644 --- a/vllm/model_executor/models/plamo2.py +++ b/vllm/model_executor/models/plamo2.py @@ -748,7 +748,6 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): config = vllm_config.model_config.hf_config self.config = config - self.padding_idx = config.pad_token_id self.vocab_size = config.vocab_size self.embed_tokens = VocabParallelEmbedding( diff --git a/vllm/model_executor/models/plamo3.py b/vllm/model_executor/models/plamo3.py index 4ba51898d30c..1accc054156e 100644 --- a/vllm/model_executor/models/plamo3.py +++ b/vllm/model_executor/models/plamo3.py @@ -317,7 +317,6 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): config = vllm_config.model_config.hf_config self.config = config - self.padding_idx = config.pad_token_id self.vocab_size = config.vocab_size self.org_vocab_size = config.vocab_size diff --git a/vllm/model_executor/models/qwen3_moe.py b/vllm/model_executor/models/qwen3_moe.py index eba4b0f5f815..f9da9248e9d5 100644 --- a/vllm/model_executor/models/qwen3_moe.py +++ b/vllm/model_executor/models/qwen3_moe.py @@ -443,7 +443,6 @@ def __init__( eplb_config = parallel_config.eplb_config self.num_redundant_experts = eplb_config.num_redundant_experts - self.padding_idx = config.pad_token_id self.vocab_size = config.vocab_size self.config = config self.quant_config = quant_config