From c2a0580dec214a92d3252f26143d45d39f045ec5 Mon Sep 17 00:00:00 2001 From: McZyWu Date: Fri, 27 Mar 2026 16:08:43 +0800 Subject: [PATCH 1/2] [NPU] recover accuracy for gemma3-4b-it for transformer5.3 --- python/sglang/srt/models/gemma3_causal.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/python/sglang/srt/models/gemma3_causal.py b/python/sglang/srt/models/gemma3_causal.py index 0481cae0eeba..ede94b340640 100644 --- a/python/sglang/srt/models/gemma3_causal.py +++ b/python/sglang/srt/models/gemma3_causal.py @@ -42,9 +42,10 @@ default_weight_loader, maybe_remap_kv_scale_name, ) -from sglang.srt.utils import add_prefix, cpu_has_amx_support, is_cpu, make_layers +from sglang.srt.utils import add_prefix, cpu_has_amx_support, is_cpu, is_npu, make_layers _is_cpu = is_cpu() +_is_npu = is_npu() _is_cpu_amx_available = cpu_has_amx_support() @@ -573,10 +574,17 @@ def __init__( local_theta = getattr(config, "rope_local_base_freq", 10000.0) global_config = copy.deepcopy(config) - global_config.rope_parameters = { - "rope_type": "default", - "rope_theta": global_theta, - } + if not is_npu(): + global_config.rope_parameters = { + "rope_type": "default", + "rope_theta": global_theta, + } + else: + global_config.rope_parameters = { + "rope_theta": global_theta, + "factor": 8, + "rope_type": "linear", + } self.rotary_emb = Gemma3RotaryEmbedding(config=global_config) self.gradient_checkpointing = False From 321735dd2c8d41ba0b26e45ea0e98239de0c1d54 Mon Sep 17 00:00:00 2001 From: McZyWu Date: Fri, 27 Mar 2026 17:16:23 +0800 Subject: [PATCH 2/2] fix lint checks --- python/sglang/srt/models/gemma3_causal.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/python/sglang/srt/models/gemma3_causal.py b/python/sglang/srt/models/gemma3_causal.py index ede94b340640..db243ea3cd88 100644 --- a/python/sglang/srt/models/gemma3_causal.py +++ b/python/sglang/srt/models/gemma3_causal.py @@ -42,7 +42,13 @@ default_weight_loader, maybe_remap_kv_scale_name, ) -from sglang.srt.utils import add_prefix, cpu_has_amx_support, is_cpu, is_npu, make_layers +from sglang.srt.utils import ( + add_prefix, + cpu_has_amx_support, + is_cpu, + is_npu, + make_layers, +) _is_cpu = is_cpu() _is_npu = is_npu() @@ -574,7 +580,7 @@ def __init__( local_theta = getattr(config, "rope_local_base_freq", 10000.0) global_config = copy.deepcopy(config) - if not is_npu(): + if not _is_npu: global_config.rope_parameters = { "rope_type": "default", "rope_theta": global_theta,