diff --git a/python/sglang/srt/model_executor/model_runner.py b/python/sglang/srt/model_executor/model_runner.py index dbbf6e66dd64..81d2dd86bb4e 100644 --- a/python/sglang/srt/model_executor/model_runner.py +++ b/python/sglang/srt/model_executor/model_runner.py @@ -323,7 +323,6 @@ def __init__( # Model-specific adjustment self.model_specific_adjustment() - self.check_quantized_moe_compatibility() # Set the global server_args in the scheduler process set_global_server_args_for_scheduler(server_args) @@ -355,6 +354,7 @@ def __init__( # Initialize the model runner self.initialize(min_per_gpu_memory) + self.check_quantized_moe_compatibility() # Temporary cached values self.support_pp = (