diff --git a/python/sglang/srt/configs/model_config.py b/python/sglang/srt/configs/model_config.py index 26dfbe5eb1d5..8fb7c24a76ff 100644 --- a/python/sglang/srt/configs/model_config.py +++ b/python/sglang/srt/configs/model_config.py @@ -518,12 +518,17 @@ def _parse_quant_hf_config(self): # example: https://huggingface.co/Barrrrry/DeepSeek-R1-W4AFP8/tree/main is_local = os.path.exists(self.model_path) if not is_local: - import huggingface_hub + # Conditional import based on SGLANG_USE_MODELSCOPE environment variable + if envs.SGLANG_USE_MODELSCOPE is True: + from modelscope import HubApi, model_file_download - try: + hf_api = HubApi() + else: + import huggingface_hub from huggingface_hub import HfApi, hf_hub_download hf_api = HfApi() + try: # Retry HF API call up to 3 times file_exists = retry( lambda: hf_api.file_exists( @@ -535,11 +540,18 @@ def _parse_quant_hf_config(self): ) if file_exists: # Download and parse the quantization config for remote models - quant_config_file = hf_hub_download( - repo_id=self.model_path, - filename="hf_quant_config.json", - revision=self.revision, - ) + if envs.SGLANG_USE_MODELSCOPE.get(): + quant_config_file = model_file_download( + model_id=self.model_path, + file_path="hf_quant_config.json", + revision=self.revision, + ) + else: + quant_config_file = hf_hub_download( + repo_id=self.model_path, + filename="hf_quant_config.json", + revision=self.revision, + ) with open(quant_config_file) as f: quant_config_dict = json.load(f) quant_cfg = self._parse_modelopt_quant_config(quant_config_dict) diff --git a/python/sglang/srt/utils/hf_transformers_utils.py b/python/sglang/srt/utils/hf_transformers_utils.py index 0e71dfb31383..53f7d575dde5 100644 --- a/python/sglang/srt/utils/hf_transformers_utils.py +++ b/python/sglang/srt/utils/hf_transformers_utils.py @@ -24,11 +24,18 @@ import torch from huggingface_hub import snapshot_download + +from sglang.srt.utils import get_bool_env_var + +# Conditional import based on SGLANG_USE_MODELSCOPE environment variable +if get_bool_env_var("SGLANG_USE_MODELSCOPE"): + from modelscope import AutoConfig, GenerationConfig +else: + from transformers import AutoConfig, GenerationConfig + from transformers import ( - AutoConfig, AutoProcessor, AutoTokenizer, - GenerationConfig, PretrainedConfig, PreTrainedTokenizer, PreTrainedTokenizerBase,