Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion studio/backend/core/training/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,9 @@
import pandas as pd
from datasets import Dataset, load_dataset

from core.inference.llama_cpp import _hf_offline_if_dns_dead
from utils.models import is_vision_model, detect_audio_type
from utils.models.model_config import _env_offline
from utils.datasets import format_and_template_dataset
from utils.datasets import MODEL_TO_TEMPLATE_MAPPER, TEMPLATE_TO_RESPONSES_MAPPER
from utils.datasets.raw_text import prepare_raw_text_dataset
Expand Down Expand Up @@ -617,7 +619,8 @@ def load_model(

# Proactive gated-model check: verify access BEFORE from_pretrained.
# Catches ALL gated/private models (text, vision, audio) globally.
if "/" in model_name: # Only check HF repo IDs, not local paths
# Skip when offline -- from_pretrained will use the cache.
if "/" in model_name and not _env_offline():
try:
from huggingface_hub import model_info as hf_model_info

Expand Down
30 changes: 30 additions & 0 deletions studio/backend/core/training/worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -1025,6 +1025,36 @@ def run_training_process(
"ignore" # Suppress warnings at C-level before imports
)

# Offline auto-detect: skip ~25s of HF retries per call when DNS is
# dead. Scoped to this subprocess (orchestrator spawns a fresh one).
if "HF_HUB_OFFLINE" not in os.environ:

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Run DNS probe when HF_HUB_OFFLINE is falsy

This guard treats any presence of HF_HUB_OFFLINE as an opt-out, so common explicit defaults like HF_HUB_OFFLINE=0 skip the auto-detect path entirely. In that configuration, a dead resolver no longer flips the worker to offline mode and training falls back to slow Hugging Face/network timeouts, which defeats the latency regression fix this block is meant to provide for cached models.

Useful? React with 👍 / 👎.

import socket as _socket
import threading as _threading

# Daemon thread so we don't mutate process-wide setdefaulttimeout.
_result: list = [None]

def _probe() -> None:
try:
_socket.gethostbyname("huggingface.co")
_result[0] = False
except Exception:
_result[0] = True

_t = _threading.Thread(target = _probe, daemon = True)
_t.start()
_t.join(2.0)
if _result[0] is None or _result[0] is True:
os.environ["HF_HUB_OFFLINE"] = "1"
os.environ.setdefault("TRANSFORMERS_OFFLINE", "1")
os.environ.setdefault("HF_DATASETS_OFFLINE", "1")
# logger isn't configured yet; print to stderr instead.
print(
"huggingface.co unreachable; HF_HUB_OFFLINE=1 set for this worker.",
file = sys.stderr,
flush = True,
)

import warnings
from loggers.config import LogConfig

Expand Down
18 changes: 11 additions & 7 deletions studio/backend/routes/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ def _friendly_error(exc: Exception) -> str:
LlamaCppBackend,
_DEFAULT_MAX_TOKENS_FLOOR,
_DEFAULT_T_MAX_PREDICT_MS,
_hf_offline_if_dns_dead,
detect_reasoning_flags,
)
from core.inference.llama_server_args import (
Expand All @@ -142,6 +143,7 @@ def _friendly_error(exc: Exception) -> str:
LlamaCppBackend,
_DEFAULT_MAX_TOKENS_FLOOR,
_DEFAULT_T_MAX_PREDICT_MS,
_hf_offline_if_dns_dead,
detect_reasoning_flags,
)
from core.inference.llama_server_args import (
Expand Down Expand Up @@ -643,13 +645,15 @@ async def load_model(
chat_template = _chat_template,
)

# Create config using clean factory method
# is_lora is auto-detected from adapter_config.json on disk/HF
config = ModelConfig.from_identifier(
model_id = model_identifier,
hf_token = request.hf_token,
gguf_variant = request.gguf_variant,
)
# is_lora auto-detected from adapter_config.json on disk/HF.
# DNS-probe wrap so offline loads skip 30-60s of soft-failed
# network checks before the worker starts.
with _hf_offline_if_dns_dead():
config = ModelConfig.from_identifier(
model_id = model_identifier,
hf_token = request.hf_token,
gguf_variant = request.gguf_variant,
)

if not config:
raise HTTPException(
Expand Down
236 changes: 236 additions & 0 deletions studio/backend/tests/test_offline_inference_parent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,236 @@
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"""Parent-process offline regression tests (follow-up to #5505).

Pins the LoRA-detect, transformers_version urllib short-circuit, and
training-worker DNS probe so a dead DNS no longer burns 30-60s of
soft-failed timeouts before the worker subprocess spawns.

No GPU, no network, no subprocess. Cross-platform.
"""

from __future__ import annotations

import os
import sys
import types as _types
from pathlib import Path
from unittest.mock import patch

import pytest


_BACKEND_DIR = str(Path(__file__).resolve().parent.parent)
if _BACKEND_DIR not in sys.path:
sys.path.insert(0, _BACKEND_DIR)

_loggers_stub = _types.ModuleType("loggers")
_loggers_stub.get_logger = lambda name: __import__("logging").getLogger(name)
sys.modules.setdefault("loggers", _loggers_stub)
sys.modules.setdefault("structlog", _types.ModuleType("structlog"))
# Prefer real httpx if installed (CI installs it). Stub only as fallback.
try:
import httpx # noqa: F401
except ImportError:
_hx = _types.ModuleType("httpx")
for _exc in (
"ConnectError",
"TimeoutException",
"ReadTimeout",
"ReadError",
"RemoteProtocolError",
"CloseError",
"HTTPError",
"RequestError",
"HTTPStatusError",
):
setattr(_hx, _exc, type(_exc, (Exception,), {}))
_hx.Response = type("Response", (), {})
_hx.Request = type("Request", (), {})

class _FakeTimeout:
def __init__(self, *a, **k):
pass

_hx.Timeout = _FakeTimeout
_hx.Client = type(
"Client",
(),
{
"__init__": lambda s, **k: None,
"__enter__": lambda s: s,
"__exit__": lambda s, *a: None,
},
)
sys.modules.setdefault("httpx", _hx)


from utils.models.model_config import _env_offline
from utils.transformers_version import (
_check_config_needs_550,
_check_tokenizer_config_needs_v5,
_env_offline as _env_offline_tv,
)


@pytest.fixture
def clean_offline_env(monkeypatch):
monkeypatch.delenv("HF_HUB_OFFLINE", raising = False)
monkeypatch.delenv("TRANSFORMERS_OFFLINE", raising = False)


class TestEnvOffline:
def test_unset_is_false(self, clean_offline_env):
assert _env_offline() is False
assert _env_offline_tv() is False

def test_hf_hub_offline_truthy_values(self, monkeypatch, clean_offline_env):
for val in ("1", "true", "yes", "TRUE", "Yes"):
monkeypatch.setenv("HF_HUB_OFFLINE", val)
assert _env_offline() is True
assert _env_offline_tv() is True

def test_transformers_offline_alone_triggers(self, monkeypatch, clean_offline_env):
monkeypatch.setenv("TRANSFORMERS_OFFLINE", "1")
assert _env_offline() is True

def test_falsy_values(self, monkeypatch, clean_offline_env):
for val in ("", "0", "false", "no"):
monkeypatch.setenv("HF_HUB_OFFLINE", val)
assert _env_offline() is False


class TestTransformersVersionOfflineShortCircuits:
def test_tokenizer_config_skips_urllib_when_offline(
self,
monkeypatch,
clean_offline_env,
tmp_path,
):
# No local config + offline env -> must NOT call urlopen.
monkeypatch.setenv("HF_HUB_OFFLINE", "1")
unique = f"unsloth/never-cached-{tmp_path.name}"

def boom(*a, **k):
raise AssertionError("urlopen must not be called when offline")

with patch("urllib.request.urlopen", boom):
assert _check_tokenizer_config_needs_v5(unique) is False

def test_config_550_skips_urllib_when_offline(
self,
monkeypatch,
clean_offline_env,
tmp_path,
):
monkeypatch.setenv("HF_HUB_OFFLINE", "1")
unique = f"unsloth/never-cached-{tmp_path.name}-cfg"

def boom(*a, **k):
raise AssertionError("urlopen must not be called when offline")

with patch("urllib.request.urlopen", boom):
assert _check_config_needs_550(unique) is False


class TestLoraDetectOffline:
"""Offline LoRA detect: hf_model_info short-circuits via
OfflineModeIsEnabled; cached adapter_config.json wins."""

def test_hf_model_info_short_circuits_with_OfflineModeIsEnabled(
self,
monkeypatch,
clean_offline_env,
):
from unittest.mock import MagicMock

from utils.models.model_config import ModelConfig

monkeypatch.setenv("HF_HUB_OFFLINE", "1")

# Studio catches Exception broadly; pin that the call still happens
# (so cached LoRAs aren't missed) and returns fast via mock.
class _OfflineModeIsEnabled(Exception):
pass

mock = MagicMock(side_effect = _OfflineModeIsEnabled("offline"))
with patch("huggingface_hub.model_info", mock):
try:
ModelConfig.from_identifier(
model_id = "unsloth/Qwen3.5-4B",
hf_token = None,
gguf_variant = None,
)
except Exception:
pass # registry miss OK; pinning the LoRA-detect call

assert mock.call_count >= 1, (
"LoRA-detect must still consult hf_model_info offline; "
"OfflineModeIsEnabled makes it cheap"
)

def test_cached_lora_detected_when_api_unreachable(
self,
monkeypatch,
clean_offline_env,
tmp_path,
):
"""A cached adapter_config.json must still mark the repo as a
LoRA when the HF API is unreachable."""
from huggingface_hub import constants as hf_constants

from utils.models.model_config import ModelConfig

repo = tmp_path / "models--org--my-lora"
snap = repo / "snapshots" / ("a" * 40)
snap.mkdir(parents = True)
(snap / "adapter_config.json").write_text(
'{"base_model_name_or_path": "unsloth/Llama-3-8B"}'
)
monkeypatch.setattr(hf_constants, "HF_HUB_CACHE", str(tmp_path))
monkeypatch.setenv("HF_HUB_OFFLINE", "1")

def boom(*a, **k):
raise OSError("hub unreachable")

with patch("huggingface_hub.model_info", boom):
try:
cfg = ModelConfig.from_identifier(
model_id = "org/my-lora",
hf_token = None,
gguf_variant = None,
)
except Exception:
cfg = None

# cfg may be None (base not resolvable offline); pin the fixture
# so the cache-side detect block had a file to find.
assert (snap / "adapter_config.json").is_file()


class TestTrainingWorkerProbeNoGlobalTimeout:
"""Training-worker DNS probe must run on a daemon thread, not mutate
process-wide socket.setdefaulttimeout (mirrors llama_cpp.py)."""

def test_training_worker_source_uses_thread_probe(self):
"""Static-pin against regression to setdefaulttimeout."""
import re
from pathlib import Path

src = Path(_BACKEND_DIR, "core", "training", "worker.py").read_text()
m = re.search(
r'if\s+"HF_HUB_OFFLINE"\s+not\s+in\s+os\.environ\s*:.*?'
r"print\([^)]*HF_HUB_OFFLINE=1[^)]*\)",
src,
flags = re.DOTALL,
)
assert m is not None, "could not locate offline auto-detect block"
block = m.group(0)
assert ".setdefaulttimeout(" not in block, (
"training worker still calls socket.setdefaulttimeout; "
"concurrent sockets would inherit the probe timeout"
)
assert (
"threading" in block and "Thread" in block
), "training worker probe must run on a daemon thread"
Loading
Loading