Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
970219a
split venv_t5 into venv_t5_530 and venv_t5_550 for tiered transformer…
rolandtannous Apr 6, 2026
e8328c1
fix bfloat16 crash on T4 for FORCE_FLOAT32 models and disable trust_r…
rolandtannous Apr 6, 2026
efd1191
revert FORCE_FLOAT32 dtype change
rolandtannous Apr 6, 2026
3400654
restrict trust_remote_code auto-enable to Nemotron models only
rolandtannous Apr 6, 2026
41b103a
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Apr 6, 2026
b03e07e
Merge branch 'main' into fix/transformers-v5-tiers
rolandtannous Apr 6, 2026
fc49ae2
use config.json model_type for tier detection, add unsloth/nvidia nam…
rolandtannous Apr 6, 2026
fb43d46
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Apr 6, 2026
81d2581
Revert "[pre-commit.ci] auto fixes from pre-commit.com hooks"
rolandtannous Apr 6, 2026
4df9e68
Revert "use config.json model_type for tier detection, add unsloth/nv…
rolandtannous Apr 6, 2026
96dd82f
add unsloth/nvidia namespace guard to Nemotron trust_remote_code auto…
rolandtannous Apr 6, 2026
35226e6
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Apr 6, 2026
82dbf70
reorder tier checks: all substring matches before config.json fetches
rolandtannous Apr 6, 2026
30bc604
extract shared activate_transformers_for_subprocess into transformers…
rolandtannous Apr 6, 2026
18db38b
narrow Nemotron trust_remote_code to nemotron_h/nemotron-3-nano, add …
rolandtannous Apr 6, 2026
1cc14f4
clean venv_t5 dirs before re-install in setup.sh, clarify version ali…
rolandtannous Apr 6, 2026
5445fa6
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Apr 6, 2026
45e3a02
run venv_t5 migration outside deps fast-path gate in both setup scripts
rolandtannous Apr 7, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 16 additions & 25 deletions studio/backend/core/export/worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,37 +30,15 @@


def _activate_transformers_version(model_name: str) -> None:
"""Activate the correct transformers version BEFORE any ML imports.

If the model needs transformers 5.x, prepend the pre-installed .venv_t5/
directory to sys.path. Otherwise do nothing (default 4.57.x in .venv/).
"""
"""Activate the correct transformers version BEFORE any ML imports."""
# Ensure backend is on path for utils imports
backend_path = str(Path(__file__).resolve().parent.parent.parent)
if backend_path not in sys.path:
sys.path.insert(0, backend_path)

from utils.transformers_version import (
needs_transformers_5,
_resolve_base_model,
_ensure_venv_t5_exists,
_VENV_T5_DIR,
)
from utils.transformers_version import activate_transformers_for_subprocess

resolved = _resolve_base_model(model_name)
if needs_transformers_5(resolved):
if not _ensure_venv_t5_exists():
raise RuntimeError(
f"Cannot activate transformers 5.x: .venv_t5 missing at {_VENV_T5_DIR}"
)
if _VENV_T5_DIR not in sys.path:
sys.path.insert(0, _VENV_T5_DIR)
logger.info("Activated transformers 5.x from %s", _VENV_T5_DIR)
# Propagate to child subprocesses (e.g. GGUF converter)
_pp = os.environ.get("PYTHONPATH", "")
os.environ["PYTHONPATH"] = _VENV_T5_DIR + (os.pathsep + _pp if _pp else "")
else:
logger.info("Using default transformers (4.57.x) for %s", model_name)
activate_transformers_for_subprocess(model_name)


def _send_response(resp_queue: Any, response: dict) -> None:
Expand All @@ -78,6 +56,19 @@ def _handle_load(backend, cmd: dict, resp_queue: Any) -> None:
load_in_4bit = cmd.get("load_in_4bit", True)
trust_remote_code = cmd.get("trust_remote_code", False)

# Auto-enable trust_remote_code for NemotronH/Nano models.
if not trust_remote_code:
_NEMOTRON_TRUST_SUBSTRINGS = ("nemotron_h", "nemotron-h", "nemotron-3-nano")
_cp_lower = checkpoint_path.lower()
if any(sub in _cp_lower for sub in _NEMOTRON_TRUST_SUBSTRINGS) and (
_cp_lower.startswith("unsloth/") or _cp_lower.startswith("nvidia/")
):
trust_remote_code = True
logger.info(
"Auto-enabled trust_remote_code for Nemotron model: %s",
checkpoint_path,
)

try:
_send_response(
resp_queue,
Expand Down
44 changes: 12 additions & 32 deletions studio/backend/core/inference/worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,37 +34,15 @@


def _activate_transformers_version(model_name: str) -> None:
"""Activate the correct transformers version BEFORE any ML imports.

If the model needs transformers 5.x, prepend the pre-installed .venv_t5/
directory to sys.path. Otherwise do nothing (default 4.57.x in .venv/).
"""
"""Activate the correct transformers version BEFORE any ML imports."""
# Ensure backend is on path for utils imports
backend_path = str(Path(__file__).resolve().parent.parent.parent)
if backend_path not in sys.path:
sys.path.insert(0, backend_path)

from utils.transformers_version import (
needs_transformers_5,
_resolve_base_model,
_ensure_venv_t5_exists,
_VENV_T5_DIR,
)
from utils.transformers_version import activate_transformers_for_subprocess

resolved = _resolve_base_model(model_name)
if needs_transformers_5(resolved):
if not _ensure_venv_t5_exists():
raise RuntimeError(
f"Cannot activate transformers 5.x: .venv_t5 missing at {_VENV_T5_DIR}"
)
if _VENV_T5_DIR not in sys.path:
sys.path.insert(0, _VENV_T5_DIR)
logger.info("Activated transformers 5.x from %s", _VENV_T5_DIR)
# Propagate to child subprocesses (e.g. GGUF converter)
_pp = os.environ.get("PYTHONPATH", "")
os.environ["PYTHONPATH"] = _VENV_T5_DIR + (os.pathsep + _pp if _pp else "")
else:
logger.info("Using default transformers (4.57.x) for %s", model_name)
activate_transformers_for_subprocess(model_name)


def _decode_image(image_base64: str):
Expand Down Expand Up @@ -309,19 +287,21 @@ def _handle_load(backend, config: dict, resp_queue: Any) -> None:
except Exception as e:
logger.warning("Could not read adapter_config.json: %s", e)

# Auto-enable trust_remote_code for unsloth/* transformers 5.x models
# (matches the training worker logic in core/training/worker.py)
# Auto-enable trust_remote_code for NemotronH/Nano models only.
# NemotronH has config parsing bugs requiring trust_remote_code=True.
# Other transformers 5.x models are native and do NOT need it.
# NOTE: Must NOT match Llama-Nemotron (standard Llama architecture).
_NEMOTRON_TRUST_SUBSTRINGS = ("nemotron_h", "nemotron-h", "nemotron-3-nano")
trust_remote_code = config.get("trust_remote_code", False)
if not trust_remote_code:
from utils.transformers_version import needs_transformers_5

model_name = config["model_name"]
if needs_transformers_5(model_name) and model_name.lower().startswith(
"unsloth/"
_mn_lower = model_name.lower()
if any(sub in _mn_lower for sub in _NEMOTRON_TRUST_SUBSTRINGS) and (
_mn_lower.startswith("unsloth/") or _mn_lower.startswith("nvidia/")
):
trust_remote_code = True
logger.info(
"Auto-enabled trust_remote_code for unsloth/* transformers 5.x model: %s",
"Auto-enabled trust_remote_code for Nemotron model: %s",
model_name,
)

Expand Down
51 changes: 13 additions & 38 deletions studio/backend/core/training/worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -306,37 +306,15 @@ def _ensure_mamba_ssm(event_queue: Any, model_name: str) -> None:


def _activate_transformers_version(model_name: str) -> None:
"""Activate the correct transformers version BEFORE any ML imports.

If the model needs transformers 5.x, prepend the pre-installed .venv_t5/
directory to sys.path. Otherwise do nothing (default 4.57.x in .venv/).
"""
"""Activate the correct transformers version BEFORE any ML imports."""
# Ensure backend is on path for utils imports
backend_path = str(Path(__file__).resolve().parent.parent.parent)
if backend_path not in sys.path:
sys.path.insert(0, backend_path)

from utils.transformers_version import (
needs_transformers_5,
_resolve_base_model,
_ensure_venv_t5_exists,
_VENV_T5_DIR,
)
from utils.transformers_version import activate_transformers_for_subprocess

resolved = _resolve_base_model(model_name)
if needs_transformers_5(resolved):
if not _ensure_venv_t5_exists():
raise RuntimeError(
f"Cannot activate transformers 5.x: .venv_t5 missing at {_VENV_T5_DIR}"
)
if _VENV_T5_DIR not in sys.path:
sys.path.insert(0, _VENV_T5_DIR)
logger.info("Activated transformers 5.x from %s", _VENV_T5_DIR)
# Propagate to child subprocesses (e.g. GGUF converter)
_pp = os.environ.get("PYTHONPATH", "")
os.environ["PYTHONPATH"] = _VENV_T5_DIR + (os.pathsep + _pp if _pp else "")
else:
logger.info("Using default transformers (4.57.x) for %s", model_name)
activate_transformers_for_subprocess(model_name)


def run_training_process(
Expand Down Expand Up @@ -386,25 +364,22 @@ def run_training_process(
)
return

# ── 1a. Auto-enable trust_remote_code for unsloth/* transformers 5.x models ──
# Some newer architectures (e.g. NemotronH) have config parsing bugs in
# transformers that require trust_remote_code=True as a workaround.
# Only auto-enable for unsloth/* prefixed models (trusted source).
# Exclude Gemma 4 since it is a native transformers 5.5 model and
# trust_remote_code=True would bypass the compiler (disabling fused CE).
from utils.transformers_version import needs_transformers_5

# ── 1a. Auto-enable trust_remote_code for NemotronH/Nano models ──
# NemotronH has config parsing bugs in transformers that require
# trust_remote_code=True as a workaround. Other transformers 5.x models
# (Qwen3.5, Gemma 4, etc.) are native and do NOT need it — enabling it
# bypasses the compiler (disabling fused CE).
# NOTE: Must NOT match Llama-Nemotron (standard Llama architecture).
_NEMOTRON_TRUST_SUBSTRINGS = ("nemotron_h", "nemotron-h", "nemotron-3-nano")
_lowered = model_name.lower()
_is_native_t5 = any(x in _lowered for x in ("gemma-4", "gemma4"))
if (
needs_transformers_5(model_name)
and _lowered.startswith("unsloth/")
and not _is_native_t5
any(sub in _lowered for sub in _NEMOTRON_TRUST_SUBSTRINGS)
and (_lowered.startswith("unsloth/") or _lowered.startswith("nvidia/"))
and not config.get("trust_remote_code", False)
):
config["trust_remote_code"] = True
logger.info(
"Auto-enabled trust_remote_code for unsloth/* transformers 5.x model: %s",
"Auto-enabled trust_remote_code for Nemotron model: %s",
model_name,
)

Expand Down
148 changes: 148 additions & 0 deletions studio/backend/tests/test_transformers_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,11 @@
from utils.transformers_version import (
_resolve_base_model,
_check_tokenizer_config_needs_v5,
_check_config_needs_550,
_tokenizer_class_cache,
_config_needs_550_cache,
needs_transformers_5,
get_transformers_tier,
)


Expand Down Expand Up @@ -188,3 +191,148 @@ def test_local_checkpoint_resolved_via_config(self, tmp_path: Path):
# We test the full resolution chain here:
resolved = _resolve_base_model(str(tmp_path))
assert needs_transformers_5(resolved) is True


# ---------------------------------------------------------------------------
# _check_config_needs_550 — config.json architecture/model_type check
# ---------------------------------------------------------------------------


class TestCheckConfigNeeds550:
"""Tests for _check_config_needs_550() local config.json checks."""

def setup_method(self):
_config_needs_550_cache.clear()

def test_gemma4_architecture(self, tmp_path: Path):
"""config.json with Gemma4ForConditionalGeneration should return True."""
cfg = {
"architectures": ["Gemma4ForConditionalGeneration"],
"model_type": "gemma4",
}
(tmp_path / "config.json").write_text(json.dumps(cfg))

assert _check_config_needs_550(str(tmp_path)) is True

def test_gemma4_model_type_only(self, tmp_path: Path):
"""config.json with model_type=gemma4 (no architectures) should return True."""
cfg = {"model_type": "gemma4"}
(tmp_path / "config.json").write_text(json.dumps(cfg))

assert _check_config_needs_550(str(tmp_path)) is True

def test_llama_architecture(self, tmp_path: Path):
"""config.json with LlamaForCausalLM should return False."""
cfg = {"architectures": ["LlamaForCausalLM"], "model_type": "llama"}
(tmp_path / "config.json").write_text(json.dumps(cfg))

assert _check_config_needs_550(str(tmp_path)) is False

def test_no_config_json(self, tmp_path: Path):
"""Missing config.json should return False (fail-open)."""
# Patch network call to avoid real fetch
with patch("urllib.request.urlopen") as mock_urlopen:
mock_urlopen.side_effect = Exception("no network")
assert _check_config_needs_550(str(tmp_path)) is False

def test_result_is_cached(self, tmp_path: Path):
"""Subsequent calls should use the cache."""
cfg = {"architectures": ["Gemma4ForConditionalGeneration"]}
(tmp_path / "config.json").write_text(json.dumps(cfg))

key = str(tmp_path)
_check_config_needs_550(key)
assert key in _config_needs_550_cache
assert _config_needs_550_cache[key] is True

def test_local_file_skips_network(self, tmp_path: Path):
"""When local config.json exists, no network request should be made."""
cfg = {"architectures": ["LlamaForCausalLM"]}
(tmp_path / "config.json").write_text(json.dumps(cfg))

with patch("urllib.request.urlopen") as mock_urlopen:
_check_config_needs_550(str(tmp_path))
mock_urlopen.assert_not_called()


# ---------------------------------------------------------------------------
# get_transformers_tier — tier detection
# ---------------------------------------------------------------------------


class TestGetTransformersTier:
"""Tests for get_transformers_tier() tiered version detection."""

def setup_method(self):
_tokenizer_class_cache.clear()
_config_needs_550_cache.clear()

def test_gemma4_substring_returns_550(self):
assert get_transformers_tier("google/gemma-4-E2B-it") == "550"

def test_gemma4_alt_substring_returns_550(self):
assert get_transformers_tier("unsloth/gemma4-E4B-it") == "550"

def test_gemma4_config_json_returns_550(self, tmp_path: Path):
"""Local checkpoint with Gemma4 architecture → 550."""
cfg = {
"architectures": ["Gemma4ForConditionalGeneration"],
"model_type": "gemma4",
}
(tmp_path / "config.json").write_text(json.dumps(cfg))

assert get_transformers_tier(str(tmp_path)) == "550"

def test_qwen35_returns_530(self):
with patch(
"utils.transformers_version._check_config_needs_550",
return_value = False,
):
assert get_transformers_tier("Qwen/Qwen3.5-9B") == "530"

def test_ministral_returns_530(self):
with patch(
"utils.transformers_version._check_config_needs_550",
return_value = False,
):
assert (
get_transformers_tier("mistralai/Ministral-3-8B-Instruct-2512") == "530"
)

def test_llama_returns_default(self):
with (
patch(
"utils.transformers_version._check_config_needs_550",
return_value = False,
),
patch(
"utils.transformers_version._check_tokenizer_config_needs_v5",
return_value = False,
),
):
assert get_transformers_tier("meta-llama/Llama-3-8B") == "default"

def test_550_checked_before_530(self):
"""Ensure 5.5.0 is checked first — a model matching both should get 550."""
# This shouldn't happen in practice, but verifies priority
assert get_transformers_tier("gemma-4-model") == "550"

def test_needs_transformers_5_compat(self):
"""needs_transformers_5 should return True for both 530 and 550 models."""
assert needs_transformers_5("google/gemma-4-E2B-it") is True
with patch(
"utils.transformers_version._check_config_needs_550",
return_value = False,
):
assert needs_transformers_5("Qwen/Qwen3.5-9B") is True
with (
patch(
"utils.transformers_version._check_config_needs_550",
return_value = False,
),
patch(
"utils.transformers_version._check_tokenizer_config_needs_v5",
return_value = False,
),
):
assert needs_transformers_5("meta-llama/Llama-3-8B") is False
5 changes: 3 additions & 2 deletions studio/backend/utils/models/model_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -493,8 +493,9 @@ def load_model_config(
"minicpmv",
}

# Pre-computed .venv_t5 path and backend dir for subprocess version switching.
_VENV_T5_DIR = str(Path.home() / ".unsloth" / "studio" / ".venv_t5")
# Pre-computed .venv_t5 paths and backend dir for subprocess version switching.
# Vision check uses 5.5.0 (newest, recognizes all architectures).
_VENV_T5_DIR = str(Path.home() / ".unsloth" / "studio" / ".venv_t5_550")
Comment thread
rolandtannous marked this conversation as resolved.
_BACKEND_DIR = str(Path(__file__).resolve().parent.parent.parent)

# Inline script executed in a subprocess with transformers 5.x activated.
Expand Down
Loading
Loading