vllm-project · varjoranta · May 22, 2026
@@ -1431,6 +1431,8 @@ def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
                         and loaded_weight.dtype == torch.float8_e8m0fnu
                     ):
                         loaded_weight = loaded_weight.view(torch.uint8)
+                    name_mapped = None
+                    success = False
                     for mapping in expert_mapping:
                         param_name, weight_name, expert_id, expert_shard_id = mapping
                         if weight_name not in name:
@@ -1456,6 +1458,13 @@ def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
                         if success:
                             name = name_mapped
                             break
+                    if not success:
+                        # No expert mapping matched, or the loader did not
+                        # load this weight for the current rank (e.g. a
+                        # non-canonical checkpoint, or this rank holds no
+                        # replica). Skip it instead of marking it loaded or
+                        # raising UnboundLocalError.
+                        continue
                     loaded_params.add(name_mapped)
                     continue
                 elif "attn_sink" in name: