unslothai · danielhanchen · Dec 20, 2025 · Dec 9, 2025 · Dec 9, 2025 · Dec 9, 2025
@@ -72,8 +72,6 @@ def filter(self, x):
 
 
 class HidePrintMessage:
-    __slots__ = ("_original_stream", "_hidden_texts")
-
     def __init__(self, original_stream):
         self._original_stream = original_stream
         self._hidden_texts = []

@@ -72,6 +72,7 @@
     "patch_hf_quantizer",
     "verify_fp8_support_if_applicable",
     "_get_inference_mode_context_manager",
+    "hf_login",
 ]
 
 import torch
@@ -2344,3 +2345,23 @@ def _get_inference_mode_context_manager(model: torch.nn.Module):
         return torch.no_grad()
     else:
         return torch.inference_mode()
+
+
+def hf_login(token: Optional[str] = None) -> Optional[str]:
+    if token is None:
+        try:
+            from huggingface_hub import get_token
+
+            token = get_token()
+            if token is None:
+                return None
+        except:
-        except:
+        except Exception:
-        except:
+        except Exception:
+            return None
+    try:
+        from huggingface_hub import login
+
+        login(token = token)
+        return token
+    except Exception as e:
+        logger.info(f"Failed to login to huggingface using token with error: {e}")
+    return token
@@ -2130,8 +2130,7 @@ def from_pretrained(
                     "Unsloth: `unsloth_vllm_standby` is True, but  environment variable `UNSLOTH_VLLM_STANDBY` is not set to 1!"
                 )
 
-        if token is None:
-            token = get_token()
+        token = hf_login(token)
         if model_patcher is None:
             model_patcher = FastLlamaModel
         SUPPORTS_BFLOAT16 = is_bfloat16_supported()

@@ -20,6 +20,7 @@
     HAS_FLASH_ATTENTION_SOFTCAPPING,
     USE_MODELSCOPE,
     get_transformers_model_type,
+    hf_login,
 )
 from .granite import FastGraniteModel
 from .llama import FastLlamaModel, logger
@@ -151,15 +152,7 @@ def from_pretrained(
         **kwargs,
     ):
         # Login to allow private models
-        if token is None:
-            token = get_token()
-        if token is not None:
-            try:
-                from huggingface_hub import login
-
-                login(token = token)
-            except:
-                pass
+        token = hf_login(token)
         if load_in_8bit or full_finetuning or qat_scheme is not None:
             return FastModel.from_pretrained(
                 model_name = model_name,
@@ -195,8 +188,6 @@ def from_pretrained(
                 **kwargs,
             )
 
-        if token is None:
-            token = get_token()
         if isinstance(dtype, str) and dtype in ["float16", "bfloat16"]:
             dtype = getattr(torch, dtype)
         assert (
@@ -687,16 +678,8 @@ def from_pretrained(
         *args,
         **kwargs,
     ):
-        if token is None:
-            token = get_token()
         # Login to allow private models
-        if token is not None:
-            try:
-                from huggingface_hub import login
-
-                login(token = token)
-            except:
-                pass
+        token = hf_login(token)
         if whisper_language is not None:
             assert type(whisper_language) is str
         if whisper_task is not None:

@@ -397,8 +397,7 @@ def from_pretrained(
                 "Unsloth: WARNING `trust_remote_code` is True.\n"
                 "Are you certain you want to do remote code execution?"
             )
-        if token is None:
-            token = get_token()
+        token = hf_login(token)
         SUPPORTS_BFLOAT16 = is_bfloat16_supported()
 
         if DEVICE_TYPE == "cuda":