sgl-project · ShangmingCai · Dec 9, 2025 · Oct 22, 2025 · Dec 4, 2025 · Dec 5, 2025
diff --git a/python/sglang/srt/configs/model_config.py b/python/sglang/srt/configs/model_config.py
@@ -518,12 +518,17 @@ def _parse_quant_hf_config(self):
             # example: https://huggingface.co/Barrrrry/DeepSeek-R1-W4AFP8/tree/main
             is_local = os.path.exists(self.model_path)
             if not is_local:
-                import huggingface_hub
+                # Conditional import based on SGLANG_USE_MODELSCOPE environment variable
+                if envs.SGLANG_USE_MODELSCOPE is True:
+                    from modelscope import HubApi, model_file_download
 
-                try:
+                    hf_api = HubApi()
+                else:
+                    import huggingface_hub
                     from huggingface_hub import HfApi, hf_hub_download
 
                     hf_api = HfApi()
+                try:
                     # Retry HF API call up to 3 times
                     file_exists = retry(
                         lambda: hf_api.file_exists(
@@ -535,11 +540,18 @@ def _parse_quant_hf_config(self):
                     )
                     if file_exists:
                         # Download and parse the quantization config for remote models
-                        quant_config_file = hf_hub_download(
-                            repo_id=self.model_path,
-                            filename="hf_quant_config.json",
-                            revision=self.revision,
-                        )
+                        if envs.SGLANG_USE_MODELSCOPE.get():
+                            quant_config_file = model_file_download(
+                                model_id=self.model_path,
+                                file_path="hf_quant_config.json",
+                                revision=self.revision,
+                            )
+                        else:
+                            quant_config_file = hf_hub_download(
+                                repo_id=self.model_path,
+                                filename="hf_quant_config.json",
+                                revision=self.revision,
+                            )
                         with open(quant_config_file) as f:
                             quant_config_dict = json.load(f)
                         quant_cfg = self._parse_modelopt_quant_config(quant_config_dict)

diff --git a/python/sglang/srt/utils/hf_transformers_utils.py b/python/sglang/srt/utils/hf_transformers_utils.py
@@ -24,11 +24,18 @@
 
 import torch
 from huggingface_hub import snapshot_download
+
+from sglang.srt.utils import get_bool_env_var
+
+# Conditional import based on SGLANG_USE_MODELSCOPE environment variable
+if get_bool_env_var("SGLANG_USE_MODELSCOPE"):
+    from modelscope import AutoConfig, GenerationConfig
+else:
+    from transformers import AutoConfig, GenerationConfig
+
 from transformers import (
-    AutoConfig,
     AutoProcessor,
     AutoTokenizer,
-    GenerationConfig,
     PretrainedConfig,
     PreTrainedTokenizer,
     PreTrainedTokenizerBase,