From d40417e1edbbc3eecb32bfa82398daf95306647a Mon Sep 17 00:00:00 2001 From: "Wu, Chunyuan" Date: Tue, 25 Nov 2025 10:40:43 +0000 Subject: [PATCH] call check_quantized_moe_compatibility after initialize --- python/sglang/srt/model_executor/model_runner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/sglang/srt/model_executor/model_runner.py b/python/sglang/srt/model_executor/model_runner.py index a0272029aa2d..a72e534da868 100644 --- a/python/sglang/srt/model_executor/model_runner.py +++ b/python/sglang/srt/model_executor/model_runner.py @@ -306,7 +306,6 @@ def __init__( # Model-specific adjustment self.model_specific_adjustment() - self.check_quantized_moe_compatibility() # Set the global server_args in the scheduler process set_global_server_args_for_scheduler(server_args) @@ -336,6 +335,7 @@ def __init__( # Initialize the model runner self.initialize(min_per_gpu_memory) + self.check_quantized_moe_compatibility() # Temporary cached values self.support_pp = (