unslothai · danielhanchen · May 18, 2026 · May 17, 2026 · May 17, 2026 · May 17, 2026
diff --git a/studio/backend/core/inference/llama_cpp.py b/studio/backend/core/inference/llama_cpp.py
@@ -101,6 +101,46 @@
 _SWA_CACHE_LOCK = threading.Lock()
 
 
+def _probe_dns_dead(host: str = "huggingface.co", timeout: float = 2.0) -> bool:
+    """Quick blocking DNS check. Restores any prior default timeout."""
+    prev = socket.getdefaulttimeout()
+    socket.setdefaulttimeout(timeout)
+    try:
+        try:
+            socket.gethostbyname(host)
+            return False
+        except Exception:
+            return True
+    finally:
+        socket.setdefaulttimeout(prev)
+
+
+@contextlib.contextmanager
+def _hf_offline_if_dns_dead():
+    """Set HF_HUB_OFFLINE for the body of this block only when DNS to
+    huggingface.co fails. Restores the env on exit so a transient
+    resolver hiccup at the start of one load can't quarantine the whole
+    process. Respects an explicit user setting (no-op if already set)."""
+    if "HF_HUB_OFFLINE" in os.environ:
+        yield False
+        return
+    if not _probe_dns_dead():
+        yield False
+        return
+
+    transformers_was_set = "TRANSFORMERS_OFFLINE" in os.environ
+    os.environ["HF_HUB_OFFLINE"] = "1"
+    if not transformers_was_set:
+        os.environ["TRANSFORMERS_OFFLINE"] = "1"
+    logger.warning("huggingface.co unreachable; using local HF cache for this load.")
+    try:
+        yield True
+    finally:
+        os.environ.pop("HF_HUB_OFFLINE", None)
+        if not transformers_was_set:
+            os.environ.pop("TRANSFORMERS_OFFLINE", None)
+
+
 def _swa_cache_path() -> Path:
     home = os.environ.get("UNSLOTH_STUDIO_HOME") or os.environ.get("STUDIO_HOME")
     base = Path(home) if home else Path.home() / ".unsloth" / "studio"
@@ -1804,15 +1844,62 @@ def _download_gguf(
             except Exception as e:
                 logger.warning(f"Could not list repo files: {e}")
 
+            # Offline: resolve variant -> filename from the local HF cache.
+            # The heuristic below assumes filenames echo the repo name,
+            # which breaks for e.g. Qwen3.6-27B-MTP-GGUF (no "MTP" in file).
+            # Match against the rel path (not just basename) so subdir
+            # layouts like ``BF16/foo.gguf`` are findable.
+            if not gguf_filename:
+                try:
+                    from utils.models.model_config import _iter_hf_cache_snapshots
+
+                    boundary = re.compile(
+                        r"(?<![a-zA-Z0-9])"
+                        + re.escape(hf_variant.lower())
+                        + r"(?![a-zA-Z0-9])"
+                    )
+                    for snap in _iter_hf_cache_snapshots(hf_repo):
+                        matches = sorted(
+                            p.relative_to(snap).as_posix()
+                            for p in snap.rglob("*.gguf")
+                            if "mmproj" not in p.name.lower()
+                            and boundary.search(p.relative_to(snap).as_posix().lower())
+                        )
+                        if not matches:
+                            continue
+                        gguf_filename = matches[0]
+                        m = _SHARD_FULL_RE.match(Path(gguf_filename).name)
+                        if m:
+                            prefix = m.group(1)
+                            total = m.group(3)
+                            sibling_pat = re.compile(
+                                r"^"
+                                + re.escape(prefix)
+                                + r"-\d{5}-of-"
+                                + re.escape(total)
+                                + r"\.gguf$"
+                            )
+                            gguf_extra_shards = [
+                                f
+                                for f in matches[1:]
+                                if sibling_pat.match(Path(f).name)
+                            ]
+                        logger.info(
+                            "Resolved variant %s -> %s from local HF cache",
+                            hf_variant,
+                            gguf_filename,
+                        )
+                        break
+                except Exception as e:
+                    logger.debug(f"Offline cache lookup for variant failed: {e}")
+
             if not gguf_filename:
                 repo_name = hf_repo.split("/")[-1].replace("-GGUF", "")
                 gguf_filename = f"{repo_name}-{hf_variant}.gguf"
 
         # Check disk space and fall back to a smaller variant if needed
         all_gguf_files = [gguf_filename] + gguf_extra_shards
         try:
-            import os
-
             from huggingface_hub import get_paths_info, try_to_load_from_cache
 
             path_infos = list(get_paths_info(hf_repo, all_gguf_files, token = hf_token))
@@ -1946,24 +2033,50 @@ def _download_mmproj(
         Prefers mmproj-F16.gguf, falls back to any mmproj*.gguf file.
         Returns the local path, or None if no mmproj file exists.
         """
-        try:
-            from huggingface_hub import hf_hub_download, list_repo_files
 
-            files = list_repo_files(hf_repo, token = hf_token)
+        def _pick_mmproj(candidates: list[str]) -> Optional[str]:
             mmproj_files = sorted(
-                f for f in files if f.endswith(".gguf") and "mmproj" in f.lower()
+                f
+                for f in candidates
+                if f.lower().endswith(".gguf") and "mmproj" in Path(f).name.lower()
             )
             if not mmproj_files:
                 return None
-
-            # Prefer F16 variant
-            target = None
             for f in mmproj_files:
                 if f.lower().endswith("-f16.gguf"):
-                    target = f
-                    break
-            if target is None:
-                target = mmproj_files[0]
+                    return f
+            return mmproj_files[0]
+
+        target: Optional[str] = None
+        try:
+            from huggingface_hub import list_repo_files
+
+            target = _pick_mmproj(list_repo_files(hf_repo, token = hf_token))
+        except Exception as e:
+            logger.debug(f"Could not list repo files for mmproj: {e}")
+
+        # Offline: resolve mmproj from the local HF cache snapshot, same
+        # shape as _download_gguf's offline fallback above.
+        if target is None:
+            try:
+                from utils.models.model_config import _iter_hf_cache_snapshots
+
+                for snap in _iter_hf_cache_snapshots(hf_repo):
+                    rel_files = [
+                        p.relative_to(snap).as_posix() for p in snap.rglob("*.gguf")
+                    ]
+                    target = _pick_mmproj(rel_files)
+                    if target is not None:
+                        logger.info("Resolved mmproj %s from local HF cache", target)
+                        break
+            except Exception as e:
+                logger.debug(f"Offline cache lookup for mmproj failed: {e}")
+
+        if target is None:
+            return None
+
+        try:
+            from huggingface_hub import hf_hub_download
 
             logger.info(f"Downloading mmproj: {hf_repo}/{target}")
             local_path = hf_hub_download(
@@ -2052,18 +2165,22 @@ def load_model(
                 )
 
             # ── Phase 2: download (NO lock held, so cancel can proceed) ──
+            # Scope HF_HUB_OFFLINE to the download block only when DNS is
+            # dead; cleanup runs even on exception so a transient hiccup
+            # at the start of one load cannot quarantine future loads.
             if hf_repo:
-                model_path = self._download_gguf(
-                    hf_repo = hf_repo,
-                    hf_variant = hf_variant,
-                    hf_token = hf_token,
-                )
-                # Auto-download mmproj for vision models
-                if is_vision and not mmproj_path:
-                    mmproj_path = self._download_mmproj(
+                with _hf_offline_if_dns_dead():
+                    model_path = self._download_gguf(
                         hf_repo = hf_repo,
+                        hf_variant = hf_variant,
                         hf_token = hf_token,
                     )
+                    # Auto-download mmproj for vision models
+                    if is_vision and not mmproj_path:
+                        mmproj_path = self._download_mmproj(
+                            hf_repo = hf_repo,
+                            hf_token = hf_token,
+                        )
             elif gguf_path:
                 if not Path(gguf_path).is_file():
                     raise FileNotFoundError(f"GGUF file not found: {gguf_path}")

diff --git a/studio/backend/core/inference/worker.py b/studio/backend/core/inference/worker.py
@@ -648,6 +648,27 @@ def run_inference_process(
         os.environ["HF_HUB_DISABLE_XET"] = "1"
         logger.info("Xet transport disabled (HF_HUB_DISABLE_XET=1)")
 
+    # Offline auto-detect: skip 25s of hf_hub_download retries per file
+    # if DNS is dead; cached files resolve instantly under HF_HUB_OFFLINE=1.
+    # Scope is this subprocess only -- orchestrator spawns a fresh worker
+    # per load (see core/inference/orchestrator.py), so the env cannot
+    # persist across loads.
+    if "HF_HUB_OFFLINE" not in os.environ:
+        import socket as _socket
+
+        prev_timeout = _socket.getdefaulttimeout()
+        _socket.setdefaulttimeout(2.0)
+        try:
+            _socket.gethostbyname("huggingface.co")
+        except Exception:
+            os.environ["HF_HUB_OFFLINE"] = "1"
+            os.environ.setdefault("TRANSFORMERS_OFFLINE", "1")
+            logger.warning(
+                "huggingface.co unreachable; HF_HUB_OFFLINE=1 set for this worker."
+            )
+        finally:
+            _socket.setdefaulttimeout(prev_timeout)
+
     import warnings
     from loggers.config import LogConfig