From a1204f4911e23a3c05acbc6f75335bde2de24dc6 Mon Sep 17 00:00:00 2001 From: Hannu Varjoranta Date: Fri, 15 May 2026 23:45:18 +0300 Subject: [PATCH 1/2] [Bugfix][DeepseekV4] Guard expert loader against UnboundLocalError `DeepseekV4Model.load_weights` adds `name_mapped` to `loaded_params` unconditionally after the `for mapping in expert_mapping` loop, but `name_mapped` is only bound inside the loop, after the `if weight_name not in name: continue` guard: for mapping in expert_mapping: param_name, weight_name, expert_id, shard_id = mapping if weight_name not in name: continue name_mapped = name.replace(weight_name, param_name) ... loaded_params.add(name_mapped) # UnboundLocalError If a `.experts.` weight matches no entry in `expert_mapping` (the loop body never reaches the assignment for any mapping), `name_mapped` is never bound and `loaded_params.add(name_mapped)` raises UnboundLocalError, aborting checkpoint load. This is reachable with non-canonical DSV4 checkpoints whose expert tensor names differ from the model's `expert_mapping` (e.g. custom-quantized variants). It surfaced loading a custom 3-bit quantized DSV4-Flash checkpoint during the safetensors load. Fix: initialize `name_mapped = None` before the loop and skip the weight when no mapping matched (consistent with the other unmatched-weight branches in this method, which `continue`) instead of raising. The affected path is `load_weights`, which requires a fully instantiated model + distributed init to exercise; vLLM's existing DSV4 tests (`tests/models/test_deepseek_v4_mega_moe.py`) are CUDA-gated and cover only pure helpers, so there is no unit-test seam for this path. The change is a self-contained unbound-variable guard; happy to add a CUDA integration test if preferred. Closes #42769 Signed-off-by: Hannu Varjoranta --- vllm/model_executor/models/deepseek_v4.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/vllm/model_executor/models/deepseek_v4.py b/vllm/model_executor/models/deepseek_v4.py index 3f3a2fb17026..1622ad9a6d49 100644 --- a/vllm/model_executor/models/deepseek_v4.py +++ b/vllm/model_executor/models/deepseek_v4.py @@ -1529,6 +1529,7 @@ def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]: and loaded_weight.dtype == torch.float8_e8m0fnu ): loaded_weight = loaded_weight.view(torch.uint8) + name_mapped = None for mapping in expert_mapping: param_name, weight_name, expert_id, shard_id = mapping if weight_name not in name: @@ -1554,6 +1555,13 @@ def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]: if success: name = name_mapped break + if name_mapped is None: + # No expert mapping matched (e.g. a non-canonical + # checkpoint whose expert tensor names differ from + # this model's expert_mapping); nothing was loaded + # for this weight, so skip it instead of raising + # UnboundLocalError. + continue loaded_params.add(name_mapped) continue elif "attn_sink" in name: From cd5cd496deb48257557bff67a1c93d98a47515f8 Mon Sep 17 00:00:00 2001 From: Hannu Varjoranta Date: Sun, 17 May 2026 11:46:57 +0300 Subject: [PATCH 2/2] [Bugfix][DeepseekV4] Gate expert load on success, not name_mapped A non-None name_mapped no longer implies the weight was loaded: when every expert mapping is attempted but the loader returns False for this rank, name_mapped is set yet nothing was loaded. Track an explicit success flag and skip (continue) unless a mapping actually loaded, so loaded_params only records weights that were really applied. Signed-off-by: Hannu Varjoranta --- vllm/model_executor/models/deepseek_v4.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/vllm/model_executor/models/deepseek_v4.py b/vllm/model_executor/models/deepseek_v4.py index 1622ad9a6d49..64729bbd7568 100644 --- a/vllm/model_executor/models/deepseek_v4.py +++ b/vllm/model_executor/models/deepseek_v4.py @@ -1530,6 +1530,7 @@ def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]: ): loaded_weight = loaded_weight.view(torch.uint8) name_mapped = None + success = False for mapping in expert_mapping: param_name, weight_name, expert_id, shard_id = mapping if weight_name not in name: @@ -1555,12 +1556,12 @@ def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]: if success: name = name_mapped break - if name_mapped is None: - # No expert mapping matched (e.g. a non-canonical - # checkpoint whose expert tensor names differ from - # this model's expert_mapping); nothing was loaded - # for this weight, so skip it instead of raising - # UnboundLocalError. + if not success: + # No expert mapping matched, or the loader did not + # load this weight for the current rank (e.g. a + # non-canonical checkpoint, or this rank holds no + # replica). Skip it instead of marking it loaded or + # raising UnboundLocalError. continue loaded_params.add(name_mapped) continue