Skip to content
21 changes: 21 additions & 0 deletions unsloth/models/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@
"patch_hf_quantizer",
"verify_fp8_support_if_applicable",
"_get_inference_mode_context_manager",
"hf_login",
]

import torch
Expand Down Expand Up @@ -2344,3 +2345,23 @@ def _get_inference_mode_context_manager(model: torch.nn.Module):
return torch.no_grad()
else:
return torch.inference_mode()


def hf_login(token: Optional[str] = None) -> Optional[str]:
if token is None:
try:
from huggingface_hub import get_token

token = get_token()
if token is None:
return None
except:
return None
try:
from huggingface_hub import login

login(token = token)
return token
except Exception as e:
logger.info(f"Failed to login to huggingface using token with error: {e}")
return token
3 changes: 1 addition & 2 deletions unsloth/models/llama.py
Original file line number Diff line number Diff line change
Expand Up @@ -2130,8 +2130,7 @@ def from_pretrained(
"Unsloth: `unsloth_vllm_standby` is True, but environment variable `UNSLOTH_VLLM_STANDBY` is not set to 1!"
)

if token is None:
token = get_token()
token = hf_login(token)
if model_patcher is None:
model_patcher = FastLlamaModel
SUPPORTS_BFLOAT16 = is_bfloat16_supported()
Expand Down
23 changes: 3 additions & 20 deletions unsloth/models/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
HAS_FLASH_ATTENTION_SOFTCAPPING,
USE_MODELSCOPE,
get_transformers_model_type,
hf_login,
)
from .granite import FastGraniteModel
from .llama import FastLlamaModel, logger
Expand Down Expand Up @@ -151,15 +152,7 @@ def from_pretrained(
**kwargs,
):
# Login to allow private models
if token is None:
token = get_token()
if token is not None:
try:
from huggingface_hub import login

login(token = token)
except:
pass
token = hf_login(token)
if load_in_8bit or full_finetuning or qat_scheme is not None:
return FastModel.from_pretrained(
model_name = model_name,
Expand Down Expand Up @@ -195,8 +188,6 @@ def from_pretrained(
**kwargs,
)

if token is None:
token = get_token()
if isinstance(dtype, str) and dtype in ["float16", "bfloat16"]:
dtype = getattr(torch, dtype)
assert (
Expand Down Expand Up @@ -682,16 +673,8 @@ def from_pretrained(
*args,
**kwargs,
):
if token is None:
token = get_token()
# Login to allow private models
if token is not None:
try:
from huggingface_hub import login

login(token = token)
except:
pass
token = hf_login(token)
if whisper_language is not None:
assert type(whisper_language) is str
if whisper_task is not None:
Expand Down
3 changes: 1 addition & 2 deletions unsloth/models/vision.py
Original file line number Diff line number Diff line change
Expand Up @@ -390,8 +390,7 @@ def from_pretrained(
"Unsloth: WARNING `trust_remote_code` is True.\n"
"Are you certain you want to do remote code execution?"
)
if token is None:
token = get_token()
token = hf_login(token)
SUPPORTS_BFLOAT16 = is_bfloat16_supported()

if DEVICE_TYPE == "cuda":
Expand Down