sgl-project · yankay · Dec 2, 2025
@@ -70,7 +70,7 @@ jobs:
         env:
           SGLANG_USE_MODELSCOPE: true
           SGLANG_IS_IN_CI: true
-          HF_ENDPOINT: https://hf-mirror.com
+          HF_ENDPOINT: https://127.0.0.1 # Set to an inaccessible Huggingface endpoint
           TORCH_EXTENSIONS_DIR: /tmp/torch_extensions
           PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True"
           STREAMS_PER_DEVICE: 32

diff --git a/python/sglang/srt/configs/model_config.py b/python/sglang/srt/configs/model_config.py
@@ -25,7 +25,7 @@
 from sglang.srt.environ import envs
 from sglang.srt.layers.quantization import QUANTIZATION_METHODS
 from sglang.srt.server_args import ServerArgs
-from sglang.srt.utils import is_hip, retry
+from sglang.srt.utils import get_bool_env_var, is_hip, retry
 from sglang.srt.utils.hf_transformers_utils import (
     get_config,
     get_context_length,
@@ -521,9 +521,14 @@ def _parse_quant_hf_config(self):
                 import huggingface_hub
 
                 try:
-                    from huggingface_hub import HfApi, hf_hub_download
 
+                    # Conditional import based on SGLANG_USE_MODELSCOPE environment variable
+                    if get_bool_env_var("SGLANG_USE_MODELSCOPE"):
+                        from modelscope import hf_hub_download
+                    else:
+                        from huggingface_hub import HfApi, hf_hub_download
                     hf_api = HfApi()
+
                     # Retry HF API call up to 3 times
                     file_exists = retry(
                         lambda: hf_api.file_exists(

diff --git a/python/sglang/srt/utils/hf_transformers_utils.py b/python/sglang/srt/utils/hf_transformers_utils.py
@@ -23,12 +23,23 @@
 from typing import Any, Dict, List, Optional, Type, Union
 
 import torch
-from huggingface_hub import snapshot_download
+
+from sglang.srt.utils import get_bool_env_var
+
+# Conditional import based on SGLANG_USE_MODELSCOPE environment variable
+if get_bool_env_var("SGLANG_USE_MODELSCOPE"):
+    from modelscope import (
+        AutoConfig,
+        AutoProcessor,
+        AutoTokenizer,
+        GenerationConfig,
+        snapshot_download,
+    )
+else:
+    from huggingface_hub import snapshot_download
+    from transformers import AutoConfig, AutoProcessor, AutoTokenizer, GenerationConfig
+
 from transformers import (
-    AutoConfig,
-    AutoProcessor,
-    AutoTokenizer,
-    GenerationConfig,
     PretrainedConfig,
     PreTrainedTokenizer,
     PreTrainedTokenizerBase,