From 9576aac7d9e6ba0bb4601d31ab89435292f60101 Mon Sep 17 00:00:00 2001 From: "wang.yuqi" Date: Sun, 4 Jan 2026 11:36:04 +0800 Subject: [PATCH 1/5] init Signed-off-by: wang.yuqi --- vllm/config/vllm.py | 6 ----- vllm/model_executor/models/config.py | 35 +++++++++++++++++----------- 2 files changed, 21 insertions(+), 20 deletions(-) diff --git a/vllm/config/vllm.py b/vllm/config/vllm.py index e321254f21ab..30a24233575f 100644 --- a/vllm/config/vllm.py +++ b/vllm/config/vllm.py @@ -1264,12 +1264,6 @@ def _set_compile_ranges(self): computed_compile_ranges_split_points ) - def recalculate_max_model_len(self, max_model_len: int): - # Can only be called in try_verify_and_update_config - model_config = self.model_config - max_model_len = model_config.get_and_verify_max_len(max_model_len) - self.model_config.max_model_len = max_model_len - def try_verify_and_update_config(self): if self.model_config is None: return diff --git a/vllm/model_executor/models/config.py b/vllm/model_executor/models/config.py index 10fd599f9e5f..5bb180ed80ea 100644 --- a/vllm/model_executor/models/config.py +++ b/vllm/model_executor/models/config.py @@ -113,8 +113,8 @@ def verify_and_update_model_config(model_config: "ModelConfig") -> None: class NomicBertModelConfig(VerifyAndUpdateConfig): @staticmethod - def verify_and_update_config(vllm_config: "VllmConfig") -> None: - config = vllm_config.model_config.hf_config + def verify_and_update_model_config(model_config: "ModelConfig") -> None: + config = model_config.hf_config assert config.__class__.__name__ == "NomicBertConfig" assert config.activation_function in ["swiglu", "gelu"] @@ -153,42 +153,41 @@ def verify_and_update_config(vllm_config: "VllmConfig") -> None: # The context extension uses vllm style rope_theta and rope_parameters. # See #17785 #18755 if ( - not vllm_config.model_config.hf_overrides - and vllm_config.model_config.original_max_model_len is None + not model_config.hf_overrides + and model_config.original_max_model_len is None ): # Default # Reset max_model_len to max_trained_positions. # nomic-embed-text-v2-moe the length is set to 512 # by sentence_bert_config.json. - max_model_len_before = vllm_config.model_config.max_model_len - max_model_len = min( - vllm_config.model_config.max_model_len, max_trained_positions - ) + max_model_len_before = model_config.max_model_len + max_model_len = min(model_config.max_model_len, max_trained_positions) - vllm_config.recalculate_max_model_len(max_model_len) + model_config.max_model_len = model_config.get_and_verify_max_len( + max_model_len + ) logger.warning( "Nomic context extension is disabled. " "Changing max_model_len from %s to %s. " "To enable context extension, see: " "https://github.com/vllm-project/vllm/tree/main/examples/offline_inference/context_extension.html", max_model_len_before, - vllm_config.model_config.max_model_len, + model_config.max_model_len, ) else: # We need to re-verify max_model_len to avoid lengths # greater than position_embedding. - model_config = vllm_config.model_config hf_text_config = model_config.hf_text_config if isinstance(model_config.hf_overrides, dict): # hf_overrides_kw max_model_len = model_config.hf_overrides.get( - "max_model_len", vllm_config.model_config.max_model_len + "max_model_len", model_config.max_model_len ) else: # hf_overrides_fn # This might be overridden by sentence_bert_config.json. - max_model_len = vllm_config.model_config.max_model_len + max_model_len = model_config.max_model_len # reset hf_text_config for recalculate_max_model_len. if hasattr(hf_text_config, "max_model_len"): @@ -196,13 +195,21 @@ def verify_and_update_config(vllm_config: "VllmConfig") -> None: hf_text_config.max_position_embeddings = max_trained_positions hf_text_config.rope_parameters = config.rotary_kwargs["rope_parameters"] + # Update the cached derived_max_model_len to enforce the limit + model_config.model_arch_config.derived_max_model_len_and_key = ( + float(max_trained_positions), + "max_position_embeddings", + ) + # The priority of sentence_bert_config.json is higher # than max_position_embeddings encoder_config = deepcopy(model_config.encoder_config) encoder_config.pop("max_seq_length", None) model_config.encoder_config = encoder_config - vllm_config.recalculate_max_model_len(max_model_len) + model_config.max_model_len = model_config.get_and_verify_max_len( + max_model_len + ) class Qwen2ForProcessRewardModelConfig(VerifyAndUpdateConfig): From d9e644fccbea077ed6dadf87c9e0153e9afdd10f Mon Sep 17 00:00:00 2001 From: "wang.yuqi" Date: Sun, 4 Jan 2026 11:51:15 +0800 Subject: [PATCH 2/5] update Signed-off-by: wang.yuqi --- vllm/model_executor/models/config.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/vllm/model_executor/models/config.py b/vllm/model_executor/models/config.py index 5bb180ed80ea..a5adc8738358 100644 --- a/vllm/model_executor/models/config.py +++ b/vllm/model_executor/models/config.py @@ -166,14 +166,16 @@ def verify_and_update_model_config(model_config: "ModelConfig") -> None: model_config.max_model_len = model_config.get_and_verify_max_len( max_model_len ) - logger.warning( - "Nomic context extension is disabled. " - "Changing max_model_len from %s to %s. " - "To enable context extension, see: " - "https://github.com/vllm-project/vllm/tree/main/examples/offline_inference/context_extension.html", - max_model_len_before, - model_config.max_model_len, - ) + + if model_config.max_model_len != max_model_len_before: + logger.warning( + "Nomic context extension is disabled. " + "Changing max_model_len from %s to %s. " + "To enable context extension, see: " + "https://github.com/vllm-project/vllm/tree/main/examples/offline_inference/context_extension.html", + max_model_len_before, + model_config.max_model_len, + ) else: # We need to re-verify max_model_len to avoid lengths # greater than position_embedding. From c10151a63f286c7702660d1a36a137fa2d533dcb Mon Sep 17 00:00:00 2001 From: "wang.yuqi" Date: Sun, 4 Jan 2026 15:00:41 +0800 Subject: [PATCH 3/5] update Signed-off-by: wang.yuqi --- vllm/model_executor/models/config.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/vllm/model_executor/models/config.py b/vllm/model_executor/models/config.py index a5adc8738358..562dc4288987 100644 --- a/vllm/model_executor/models/config.py +++ b/vllm/model_executor/models/config.py @@ -137,6 +137,8 @@ def verify_and_update_model_config(model_config: "ModelConfig") -> None: config.intermediate_size = config.n_inner config.hidden_size = config.n_embd config.num_hidden_layers = config.n_layer + model_config.model_arch_config.hidden_size = config.hidden_size + model_config.model_arch_config.num_hidden_layers = config.num_hidden_layers head_dim = config.hidden_size // config.num_attention_heads max_trained_positions = getattr(config, "max_trained_positions", 2048) From e23ced4ca28e765f9a370c3eac9b17fd1e9edceb Mon Sep 17 00:00:00 2001 From: "wang.yuqi" Date: Sun, 4 Jan 2026 15:03:18 +0800 Subject: [PATCH 4/5] update Signed-off-by: wang.yuqi --- vllm/model_executor/models/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/model_executor/models/config.py b/vllm/model_executor/models/config.py index 562dc4288987..7dd0bb227603 100644 --- a/vllm/model_executor/models/config.py +++ b/vllm/model_executor/models/config.py @@ -138,7 +138,7 @@ def verify_and_update_model_config(model_config: "ModelConfig") -> None: config.hidden_size = config.n_embd config.num_hidden_layers = config.n_layer model_config.model_arch_config.hidden_size = config.hidden_size - model_config.model_arch_config.num_hidden_layers = config.num_hidden_layers + model_config.model_arch_config.total_num_hidden_layers = config.num_hidden_layers head_dim = config.hidden_size // config.num_attention_heads max_trained_positions = getattr(config, "max_trained_positions", 2048) From ee49a9f54dbf0b02e83878349e655e87c58b106a Mon Sep 17 00:00:00 2001 From: "wang.yuqi" Date: Sun, 4 Jan 2026 15:13:51 +0800 Subject: [PATCH 5/5] update Signed-off-by: wang.yuqi --- vllm/model_executor/models/config.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/vllm/model_executor/models/config.py b/vllm/model_executor/models/config.py index 7dd0bb227603..fbda821492f7 100644 --- a/vllm/model_executor/models/config.py +++ b/vllm/model_executor/models/config.py @@ -138,7 +138,9 @@ def verify_and_update_model_config(model_config: "ModelConfig") -> None: config.hidden_size = config.n_embd config.num_hidden_layers = config.n_layer model_config.model_arch_config.hidden_size = config.hidden_size - model_config.model_arch_config.total_num_hidden_layers = config.num_hidden_layers + model_config.model_arch_config.total_num_hidden_layers = ( + config.num_hidden_layers + ) head_dim = config.hidden_size // config.num_attention_heads max_trained_positions = getattr(config, "max_trained_positions", 2048)