From 529f77e54aa91c124e450fcf331ea456b085d185 Mon Sep 17 00:00:00 2001 From: Tomer Natan Date: Tue, 9 Dec 2025 01:42:35 -0800 Subject: [PATCH] fix server args bug --- python/sglang/srt/server_args.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/python/sglang/srt/server_args.py b/python/sglang/srt/server_args.py index 68bd6709cb52..73f424246de6 100644 --- a/python/sglang/srt/server_args.py +++ b/python/sglang/srt/server_args.py @@ -1219,21 +1219,22 @@ def _handle_model_specific_adjustments(self): ) self.disable_radix_cache = True elif model_arch in ["NemotronHForCausalLM"]: - if self.model_config.quantization in [ + model_config = self.get_model_config() + if model_config.quantization in [ "modelopt", "modelopt_fp8", "modelopt_fp4", ]: - assert self.model_config.hf_config.mlp_hidden_act == "relu2" - if self.model_config.quantization == "modelopt": + assert model_config.hf_config.mlp_hidden_act == "relu2" + if model_config.quantization == "modelopt": self.quantization = ( "modelopt_fp4" - if self.model_config.hf_config.quantization_config["quant_algo"] + if model_config.hf_config.quantization_config["quant_algo"] == "NVFP4" else "modelopt_fp8" ) else: - self.quantization = self.model_config.quantization + self.quantization = model_config.quantization self.moe_runner_backend = "flashinfer_cutlass" elif model_arch in [ "Qwen3MoeForCausalLM",