diff --git a/unsloth/models/_utils.py b/unsloth/models/_utils.py index 6f6d693b4f..0d0f90cf40 100644 --- a/unsloth/models/_utils.py +++ b/unsloth/models/_utils.py @@ -72,6 +72,7 @@ "patch_hf_quantizer", "verify_fp8_support_if_applicable", "_get_inference_mode_context_manager", + "hf_login", ] import torch @@ -2344,3 +2345,23 @@ def _get_inference_mode_context_manager(model: torch.nn.Module): return torch.no_grad() else: return torch.inference_mode() + + +def hf_login(token: Optional[str] = None) -> Optional[str]: + if token is None: + try: + from huggingface_hub import get_token + + token = get_token() + if token is None: + return None + except: + return None + try: + from huggingface_hub import login + + login(token = token) + return token + except Exception as e: + logger.info(f"Failed to login to huggingface using token with error: {e}") + return token diff --git a/unsloth/models/llama.py b/unsloth/models/llama.py index 4c9337ccf9..1d7695b9aa 100644 --- a/unsloth/models/llama.py +++ b/unsloth/models/llama.py @@ -2130,8 +2130,7 @@ def from_pretrained( "Unsloth: `unsloth_vllm_standby` is True, but environment variable `UNSLOTH_VLLM_STANDBY` is not set to 1!" ) - if token is None: - token = get_token() + token = hf_login(token) if model_patcher is None: model_patcher = FastLlamaModel SUPPORTS_BFLOAT16 = is_bfloat16_supported() diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py index bfa94d86d7..b13775076c 100644 --- a/unsloth/models/loader.py +++ b/unsloth/models/loader.py @@ -20,6 +20,7 @@ HAS_FLASH_ATTENTION_SOFTCAPPING, USE_MODELSCOPE, get_transformers_model_type, + hf_login, ) from .granite import FastGraniteModel from .llama import FastLlamaModel, logger @@ -151,15 +152,7 @@ def from_pretrained( **kwargs, ): # Login to allow private models - if token is None: - token = get_token() - if token is not None: - try: - from huggingface_hub import login - - login(token = token) - except: - pass + token = hf_login(token) if load_in_8bit or full_finetuning or qat_scheme is not None: return FastModel.from_pretrained( model_name = model_name, @@ -195,8 +188,6 @@ def from_pretrained( **kwargs, ) - if token is None: - token = get_token() if isinstance(dtype, str) and dtype in ["float16", "bfloat16"]: dtype = getattr(torch, dtype) assert ( @@ -682,16 +673,8 @@ def from_pretrained( *args, **kwargs, ): - if token is None: - token = get_token() # Login to allow private models - if token is not None: - try: - from huggingface_hub import login - - login(token = token) - except: - pass + token = hf_login(token) if whisper_language is not None: assert type(whisper_language) is str if whisper_task is not None: diff --git a/unsloth/models/vision.py b/unsloth/models/vision.py index ed19f587cf..a10d65f3fb 100644 --- a/unsloth/models/vision.py +++ b/unsloth/models/vision.py @@ -390,8 +390,7 @@ def from_pretrained( "Unsloth: WARNING `trust_remote_code` is True.\n" "Are you certain you want to do remote code execution?" ) - if token is None: - token = get_token() + token = hf_login(token) SUPPORTS_BFLOAT16 = is_bfloat16_supported() if DEVICE_TYPE == "cuda":