diff --git a/vllm/model_executor/models/gemma4.py b/vllm/model_executor/models/gemma4.py index f5528f5c34e9..4a1e891dc28b 100644 --- a/vllm/model_executor/models/gemma4.py +++ b/vllm/model_executor/models/gemma4.py @@ -1490,6 +1490,11 @@ def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]: quant_params = router_quant_params[router_name] if len(quant_params) == 3: weight_name = f"{router_name}.weight" + if is_pp_missing_parameter(weight_name, self): + del router_quant_params[router_name] + continue + if weight_name not in params_dict: + raise KeyError(weight_name) param = params_dict[weight_name] weight_loader = getattr( param, "weight_loader", default_weight_loader diff --git a/vllm/tokenizers/registry.py b/vllm/tokenizers/registry.py index 3241aa1abbd0..9d1d812c2d19 100644 --- a/vllm/tokenizers/registry.py +++ b/vllm/tokenizers/registry.py @@ -1,6 +1,7 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project import contextlib +import copy from dataclasses import dataclass, field from functools import lru_cache from pathlib import Path @@ -104,6 +105,7 @@ def _maybe_patch_gemma4_gguf_tokenizer( if not special_ids: return tokenizer + patched_tokenizer = copy.copy(tokenizer) token_attrs = { "padding_token_id": "pad_token", "bos_token_id": "bos_token", @@ -114,12 +116,12 @@ def _maybe_patch_gemma4_gguf_tokenizer( token_id = special_ids.get(id_attr) if token_id is None: continue - token = tokenizer.convert_ids_to_tokens(token_id) + token = patched_tokenizer.convert_ids_to_tokens(token_id) if token is None: continue - setattr(tokenizer, token_attr, token) + setattr(patched_tokenizer, token_attr, token) - return tokenizer + return patched_tokenizer def resolve_tokenizer_args( diff --git a/vllm/transformers_utils/processor.py b/vllm/transformers_utils/processor.py index 3b5397113997..b4b5d30f65ac 100644 --- a/vllm/transformers_utils/processor.py +++ b/vllm/transformers_utils/processor.py @@ -1,6 +1,7 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project +import copy import importlib import inspect from functools import lru_cache @@ -367,7 +368,9 @@ def cached_processor_from_config( model_config.model, getattr(model_config.hf_config, "model_type", None), ) - processor.tokenizer = tokenizer + if tokenizer is not processor.tokenizer: + processor = copy.copy(processor) + processor.tokenizer = tokenizer return processor