From a1204f4911e23a3c05acbc6f75335bde2de24dc6 Mon Sep 17 00:00:00 2001
From: Hannu Varjoranta <hannu@varjosoft.com>
Date: Fri, 15 May 2026 23:45:18 +0300
Subject: [PATCH 1/2] [Bugfix][DeepseekV4] Guard expert loader against
 UnboundLocalError

`DeepseekV4Model.load_weights` adds `name_mapped` to `loaded_params`
unconditionally after the `for mapping in expert_mapping` loop, but
`name_mapped` is only bound inside the loop, after the
`if weight_name not in name: continue` guard:

    for mapping in expert_mapping:
        param_name, weight_name, expert_id, shard_id = mapping
        if weight_name not in name:
            continue
        name_mapped = name.replace(weight_name, param_name)
        ...
    loaded_params.add(name_mapped)   # UnboundLocalError

If a `.experts.` weight matches no entry in `expert_mapping` (the loop
body never reaches the assignment for any mapping), `name_mapped` is
never bound and `loaded_params.add(name_mapped)` raises
UnboundLocalError, aborting checkpoint load.

This is reachable with non-canonical DSV4 checkpoints whose expert
tensor names differ from the model's `expert_mapping` (e.g.
custom-quantized variants). It surfaced loading a custom 3-bit
quantized DSV4-Flash checkpoint during the safetensors load.

Fix: initialize `name_mapped = None` before the loop and skip the
weight when no mapping matched (consistent with the other
unmatched-weight branches in this method, which `continue`) instead
of raising.

The affected path is `load_weights`, which requires a fully
instantiated model + distributed init to exercise; vLLM's existing
DSV4 tests (`tests/models/test_deepseek_v4_mega_moe.py`) are
CUDA-gated and cover only pure helpers, so there is no unit-test
seam for this path. The change is a self-contained
unbound-variable guard; happy to add a CUDA integration test if
preferred.

Closes #42769

Signed-off-by: Hannu Varjoranta <hannu@varjosoft.com>
---
 vllm/model_executor/models/deepseek_v4.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/vllm/model_executor/models/deepseek_v4.py b/vllm/model_executor/models/deepseek_v4.py
index 3f3a2fb17026..1622ad9a6d49 100644
--- a/vllm/model_executor/models/deepseek_v4.py
+++ b/vllm/model_executor/models/deepseek_v4.py
@@ -1529,6 +1529,7 @@ def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
                         and loaded_weight.dtype == torch.float8_e8m0fnu
                     ):
                         loaded_weight = loaded_weight.view(torch.uint8)
+                    name_mapped = None
                     for mapping in expert_mapping:
                         param_name, weight_name, expert_id, shard_id = mapping
                         if weight_name not in name:
@@ -1554,6 +1555,13 @@ def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
                         if success:
                             name = name_mapped
                             break
+                    if name_mapped is None:
+                        # No expert mapping matched (e.g. a non-canonical
+                        # checkpoint whose expert tensor names differ from
+                        # this model's expert_mapping); nothing was loaded
+                        # for this weight, so skip it instead of raising
+                        # UnboundLocalError.
+                        continue
                     loaded_params.add(name_mapped)
                     continue
                 elif "attn_sink" in name:

From cd5cd496deb48257557bff67a1c93d98a47515f8 Mon Sep 17 00:00:00 2001
From: Hannu Varjoranta <hannu@varjosoft.com>
Date: Sun, 17 May 2026 11:46:57 +0300
Subject: [PATCH 2/2] [Bugfix][DeepseekV4] Gate expert load on success, not
 name_mapped

A non-None name_mapped no longer implies the weight was loaded: when
every expert mapping is attempted but the loader returns False for this
rank, name_mapped is set yet nothing was loaded. Track an explicit
success flag and skip (continue) unless a mapping actually loaded, so
loaded_params only records weights that were really applied.

Signed-off-by: Hannu Varjoranta <hannu@varjosoft.com>
---
 vllm/model_executor/models/deepseek_v4.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/vllm/model_executor/models/deepseek_v4.py b/vllm/model_executor/models/deepseek_v4.py
index 1622ad9a6d49..64729bbd7568 100644
--- a/vllm/model_executor/models/deepseek_v4.py
+++ b/vllm/model_executor/models/deepseek_v4.py
@@ -1530,6 +1530,7 @@ def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
                     ):
                         loaded_weight = loaded_weight.view(torch.uint8)
                     name_mapped = None
+                    success = False
                     for mapping in expert_mapping:
                         param_name, weight_name, expert_id, shard_id = mapping
                         if weight_name not in name:
@@ -1555,12 +1556,12 @@ def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
                         if success:
                             name = name_mapped
                             break
-                    if name_mapped is None:
-                        # No expert mapping matched (e.g. a non-canonical
-                        # checkpoint whose expert tensor names differ from
-                        # this model's expert_mapping); nothing was loaded
-                        # for this weight, so skip it instead of raising
-                        # UnboundLocalError.
+                    if not success:
+                        # No expert mapping matched, or the loader did not
+                        # load this weight for the current rank (e.g. a
+                        # non-canonical checkpoint, or this rank holds no
+                        # replica). Skip it instead of marking it loaded or
+                        # raising UnboundLocalError.
                         continue
                     loaded_params.add(name_mapped)
                     continue