From 974151e5db389a9c76e3e080b9d5fcca8327ae3e Mon Sep 17 00:00:00 2001 From: Kay Yan Date: Tue, 2 Dec 2025 10:34:41 +0000 Subject: [PATCH] fix: prevent HuggingFace access when SGLANG_USE_MODELSCOPE is enabled Signed-off-by: Kay Yan --- .github/workflows/pr-test-npu.yml | 2 +- python/sglang/srt/configs/model_config.py | 9 ++++++-- .../sglang/srt/utils/hf_transformers_utils.py | 21 ++++++++++++++----- 3 files changed, 24 insertions(+), 8 deletions(-) diff --git a/.github/workflows/pr-test-npu.yml b/.github/workflows/pr-test-npu.yml index af19e7549531..3e8d7c3e8d4f 100644 --- a/.github/workflows/pr-test-npu.yml +++ b/.github/workflows/pr-test-npu.yml @@ -70,7 +70,7 @@ jobs: env: SGLANG_USE_MODELSCOPE: true SGLANG_IS_IN_CI: true - HF_ENDPOINT: https://hf-mirror.com + HF_ENDPOINT: https://127.0.0.1 # Set to an inaccessible Huggingface endpoint TORCH_EXTENSIONS_DIR: /tmp/torch_extensions PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True" STREAMS_PER_DEVICE: 32 diff --git a/python/sglang/srt/configs/model_config.py b/python/sglang/srt/configs/model_config.py index 26dfbe5eb1d5..ef76ba350fd9 100644 --- a/python/sglang/srt/configs/model_config.py +++ b/python/sglang/srt/configs/model_config.py @@ -25,7 +25,7 @@ from sglang.srt.environ import envs from sglang.srt.layers.quantization import QUANTIZATION_METHODS from sglang.srt.server_args import ServerArgs -from sglang.srt.utils import is_hip, retry +from sglang.srt.utils import get_bool_env_var, is_hip, retry from sglang.srt.utils.hf_transformers_utils import ( get_config, get_context_length, @@ -521,9 +521,14 @@ def _parse_quant_hf_config(self): import huggingface_hub try: - from huggingface_hub import HfApi, hf_hub_download + # Conditional import based on SGLANG_USE_MODELSCOPE environment variable + if get_bool_env_var("SGLANG_USE_MODELSCOPE"): + from modelscope import hf_hub_download + else: + from huggingface_hub import HfApi, hf_hub_download hf_api = HfApi() + # Retry HF API call up to 3 times file_exists = retry( lambda: hf_api.file_exists( diff --git a/python/sglang/srt/utils/hf_transformers_utils.py b/python/sglang/srt/utils/hf_transformers_utils.py index 0e71dfb31383..335267d9f747 100644 --- a/python/sglang/srt/utils/hf_transformers_utils.py +++ b/python/sglang/srt/utils/hf_transformers_utils.py @@ -23,12 +23,23 @@ from typing import Any, Dict, List, Optional, Type, Union import torch -from huggingface_hub import snapshot_download + +from sglang.srt.utils import get_bool_env_var + +# Conditional import based on SGLANG_USE_MODELSCOPE environment variable +if get_bool_env_var("SGLANG_USE_MODELSCOPE"): + from modelscope import ( + AutoConfig, + AutoProcessor, + AutoTokenizer, + GenerationConfig, + snapshot_download, + ) +else: + from huggingface_hub import snapshot_download + from transformers import AutoConfig, AutoProcessor, AutoTokenizer, GenerationConfig + from transformers import ( - AutoConfig, - AutoProcessor, - AutoTokenizer, - GenerationConfig, PretrainedConfig, PreTrainedTokenizer, PreTrainedTokenizerBase,