From 970219a3035d3cc67e9953204f5a3ab58924bbf0 Mon Sep 17 00:00:00 2001 From: Roland Tannous Date: Mon, 6 Apr 2026 16:25:07 +0000 Subject: [PATCH 01/17] split venv_t5 into venv_t5_530 and venv_t5_550 for tiered transformers 5.x support --- studio/backend/core/export/worker.py | 39 ++- studio/backend/core/inference/worker.py | 39 ++- studio/backend/core/training/worker.py | 47 ++- .../tests/test_transformers_version.py | 134 ++++++++ studio/backend/utils/models/model_config.py | 5 +- studio/backend/utils/transformers_version.py | 294 ++++++++++++++---- studio/setup.ps1 | 74 ++++- studio/setup.sh | 37 ++- 8 files changed, 534 insertions(+), 135 deletions(-) diff --git a/studio/backend/core/export/worker.py b/studio/backend/core/export/worker.py index 6af6ff1193..4bdfd13b78 100644 --- a/studio/backend/core/export/worker.py +++ b/studio/backend/core/export/worker.py @@ -32,8 +32,8 @@ def _activate_transformers_version(model_name: str) -> None: """Activate the correct transformers version BEFORE any ML imports. - If the model needs transformers 5.x, prepend the pre-installed .venv_t5/ - directory to sys.path. Otherwise do nothing (default 4.57.x in .venv/). + Uses get_transformers_tier() to decide between .venv_t5_550/ (5.5.0), + .venv_t5_530/ (5.3.0), or the default 4.57.x. """ # Ensure backend is on path for utils imports backend_path = str(Path(__file__).resolve().parent.parent.parent) @@ -41,24 +41,37 @@ def _activate_transformers_version(model_name: str) -> None: sys.path.insert(0, backend_path) from utils.transformers_version import ( - needs_transformers_5, + get_transformers_tier, _resolve_base_model, - _ensure_venv_t5_exists, - _VENV_T5_DIR, + _ensure_venv_t5_530_exists, + _ensure_venv_t5_550_exists, + _VENV_T5_530_DIR, + _VENV_T5_550_DIR, ) resolved = _resolve_base_model(model_name) - if needs_transformers_5(resolved): - if not _ensure_venv_t5_exists(): + tier = get_transformers_tier(resolved) + + if tier == "550": + if not _ensure_venv_t5_550_exists(): + raise RuntimeError( + f"Cannot activate transformers 5.5.0: .venv_t5_550 missing at {_VENV_T5_550_DIR}" + ) + if _VENV_T5_550_DIR not in sys.path: + sys.path.insert(0, _VENV_T5_550_DIR) + logger.info("Activated transformers 5.5.0 from %s", _VENV_T5_550_DIR) + _pp = os.environ.get("PYTHONPATH", "") + os.environ["PYTHONPATH"] = _VENV_T5_550_DIR + (os.pathsep + _pp if _pp else "") + elif tier == "530": + if not _ensure_venv_t5_530_exists(): raise RuntimeError( - f"Cannot activate transformers 5.x: .venv_t5 missing at {_VENV_T5_DIR}" + f"Cannot activate transformers 5.3.0: .venv_t5_530 missing at {_VENV_T5_530_DIR}" ) - if _VENV_T5_DIR not in sys.path: - sys.path.insert(0, _VENV_T5_DIR) - logger.info("Activated transformers 5.x from %s", _VENV_T5_DIR) - # Propagate to child subprocesses (e.g. GGUF converter) + if _VENV_T5_530_DIR not in sys.path: + sys.path.insert(0, _VENV_T5_530_DIR) + logger.info("Activated transformers 5.3.0 from %s", _VENV_T5_530_DIR) _pp = os.environ.get("PYTHONPATH", "") - os.environ["PYTHONPATH"] = _VENV_T5_DIR + (os.pathsep + _pp if _pp else "") + os.environ["PYTHONPATH"] = _VENV_T5_530_DIR + (os.pathsep + _pp if _pp else "") else: logger.info("Using default transformers (4.57.x) for %s", model_name) diff --git a/studio/backend/core/inference/worker.py b/studio/backend/core/inference/worker.py index 7f7291a56d..3c5363cb77 100644 --- a/studio/backend/core/inference/worker.py +++ b/studio/backend/core/inference/worker.py @@ -36,8 +36,8 @@ def _activate_transformers_version(model_name: str) -> None: """Activate the correct transformers version BEFORE any ML imports. - If the model needs transformers 5.x, prepend the pre-installed .venv_t5/ - directory to sys.path. Otherwise do nothing (default 4.57.x in .venv/). + Uses get_transformers_tier() to decide between .venv_t5_550/ (5.5.0), + .venv_t5_530/ (5.3.0), or the default 4.57.x. """ # Ensure backend is on path for utils imports backend_path = str(Path(__file__).resolve().parent.parent.parent) @@ -45,24 +45,37 @@ def _activate_transformers_version(model_name: str) -> None: sys.path.insert(0, backend_path) from utils.transformers_version import ( - needs_transformers_5, + get_transformers_tier, _resolve_base_model, - _ensure_venv_t5_exists, - _VENV_T5_DIR, + _ensure_venv_t5_530_exists, + _ensure_venv_t5_550_exists, + _VENV_T5_530_DIR, + _VENV_T5_550_DIR, ) resolved = _resolve_base_model(model_name) - if needs_transformers_5(resolved): - if not _ensure_venv_t5_exists(): + tier = get_transformers_tier(resolved) + + if tier == "550": + if not _ensure_venv_t5_550_exists(): + raise RuntimeError( + f"Cannot activate transformers 5.5.0: .venv_t5_550 missing at {_VENV_T5_550_DIR}" + ) + if _VENV_T5_550_DIR not in sys.path: + sys.path.insert(0, _VENV_T5_550_DIR) + logger.info("Activated transformers 5.5.0 from %s", _VENV_T5_550_DIR) + _pp = os.environ.get("PYTHONPATH", "") + os.environ["PYTHONPATH"] = _VENV_T5_550_DIR + (os.pathsep + _pp if _pp else "") + elif tier == "530": + if not _ensure_venv_t5_530_exists(): raise RuntimeError( - f"Cannot activate transformers 5.x: .venv_t5 missing at {_VENV_T5_DIR}" + f"Cannot activate transformers 5.3.0: .venv_t5_530 missing at {_VENV_T5_530_DIR}" ) - if _VENV_T5_DIR not in sys.path: - sys.path.insert(0, _VENV_T5_DIR) - logger.info("Activated transformers 5.x from %s", _VENV_T5_DIR) - # Propagate to child subprocesses (e.g. GGUF converter) + if _VENV_T5_530_DIR not in sys.path: + sys.path.insert(0, _VENV_T5_530_DIR) + logger.info("Activated transformers 5.3.0 from %s", _VENV_T5_530_DIR) _pp = os.environ.get("PYTHONPATH", "") - os.environ["PYTHONPATH"] = _VENV_T5_DIR + (os.pathsep + _pp if _pp else "") + os.environ["PYTHONPATH"] = _VENV_T5_530_DIR + (os.pathsep + _pp if _pp else "") else: logger.info("Using default transformers (4.57.x) for %s", model_name) diff --git a/studio/backend/core/training/worker.py b/studio/backend/core/training/worker.py index 0454eada89..db5cf8e948 100644 --- a/studio/backend/core/training/worker.py +++ b/studio/backend/core/training/worker.py @@ -308,8 +308,8 @@ def _ensure_mamba_ssm(event_queue: Any, model_name: str) -> None: def _activate_transformers_version(model_name: str) -> None: """Activate the correct transformers version BEFORE any ML imports. - If the model needs transformers 5.x, prepend the pre-installed .venv_t5/ - directory to sys.path. Otherwise do nothing (default 4.57.x in .venv/). + Uses get_transformers_tier() to decide between .venv_t5_550/ (5.5.0), + .venv_t5_530/ (5.3.0), or the default 4.57.x. """ # Ensure backend is on path for utils imports backend_path = str(Path(__file__).resolve().parent.parent.parent) @@ -317,24 +317,37 @@ def _activate_transformers_version(model_name: str) -> None: sys.path.insert(0, backend_path) from utils.transformers_version import ( - needs_transformers_5, + get_transformers_tier, _resolve_base_model, - _ensure_venv_t5_exists, - _VENV_T5_DIR, + _ensure_venv_t5_530_exists, + _ensure_venv_t5_550_exists, + _VENV_T5_530_DIR, + _VENV_T5_550_DIR, ) resolved = _resolve_base_model(model_name) - if needs_transformers_5(resolved): - if not _ensure_venv_t5_exists(): + tier = get_transformers_tier(resolved) + + if tier == "550": + if not _ensure_venv_t5_550_exists(): + raise RuntimeError( + f"Cannot activate transformers 5.5.0: .venv_t5_550 missing at {_VENV_T5_550_DIR}" + ) + if _VENV_T5_550_DIR not in sys.path: + sys.path.insert(0, _VENV_T5_550_DIR) + logger.info("Activated transformers 5.5.0 from %s", _VENV_T5_550_DIR) + _pp = os.environ.get("PYTHONPATH", "") + os.environ["PYTHONPATH"] = _VENV_T5_550_DIR + (os.pathsep + _pp if _pp else "") + elif tier == "530": + if not _ensure_venv_t5_530_exists(): raise RuntimeError( - f"Cannot activate transformers 5.x: .venv_t5 missing at {_VENV_T5_DIR}" + f"Cannot activate transformers 5.3.0: .venv_t5_530 missing at {_VENV_T5_530_DIR}" ) - if _VENV_T5_DIR not in sys.path: - sys.path.insert(0, _VENV_T5_DIR) - logger.info("Activated transformers 5.x from %s", _VENV_T5_DIR) - # Propagate to child subprocesses (e.g. GGUF converter) + if _VENV_T5_530_DIR not in sys.path: + sys.path.insert(0, _VENV_T5_530_DIR) + logger.info("Activated transformers 5.3.0 from %s", _VENV_T5_530_DIR) _pp = os.environ.get("PYTHONPATH", "") - os.environ["PYTHONPATH"] = _VENV_T5_DIR + (os.pathsep + _pp if _pp else "") + os.environ["PYTHONPATH"] = _VENV_T5_530_DIR + (os.pathsep + _pp if _pp else "") else: logger.info("Using default transformers (4.57.x) for %s", model_name) @@ -392,14 +405,14 @@ def run_training_process( # Only auto-enable for unsloth/* prefixed models (trusted source). # Exclude Gemma 4 since it is a native transformers 5.5 model and # trust_remote_code=True would bypass the compiler (disabling fused CE). - from utils.transformers_version import needs_transformers_5 + from utils.transformers_version import get_transformers_tier _lowered = model_name.lower() - _is_native_t5 = any(x in _lowered for x in ("gemma-4", "gemma4")) + _tier = get_transformers_tier(model_name) if ( - needs_transformers_5(model_name) + _tier != "default" and _lowered.startswith("unsloth/") - and not _is_native_t5 + and _tier != "550" # Gemma 4 is native t5.5 — trust_remote_code bypasses compiler and not config.get("trust_remote_code", False) ): config["trust_remote_code"] = True diff --git a/studio/backend/tests/test_transformers_version.py b/studio/backend/tests/test_transformers_version.py index f3dae537c7..387c1c6f13 100644 --- a/studio/backend/tests/test_transformers_version.py +++ b/studio/backend/tests/test_transformers_version.py @@ -31,8 +31,11 @@ from utils.transformers_version import ( _resolve_base_model, _check_tokenizer_config_needs_v5, + _check_config_needs_550, _tokenizer_class_cache, + _config_needs_550_cache, needs_transformers_5, + get_transformers_tier, ) @@ -188,3 +191,134 @@ def test_local_checkpoint_resolved_via_config(self, tmp_path: Path): # We test the full resolution chain here: resolved = _resolve_base_model(str(tmp_path)) assert needs_transformers_5(resolved) is True + + +# --------------------------------------------------------------------------- +# _check_config_needs_550 — config.json architecture/model_type check +# --------------------------------------------------------------------------- + + +class TestCheckConfigNeeds550: + """Tests for _check_config_needs_550() local config.json checks.""" + + def setup_method(self): + _config_needs_550_cache.clear() + + def test_gemma4_architecture(self, tmp_path: Path): + """config.json with Gemma4ForConditionalGeneration should return True.""" + cfg = {"architectures": ["Gemma4ForConditionalGeneration"], "model_type": "gemma4"} + (tmp_path / "config.json").write_text(json.dumps(cfg)) + + assert _check_config_needs_550(str(tmp_path)) is True + + def test_gemma4_model_type_only(self, tmp_path: Path): + """config.json with model_type=gemma4 (no architectures) should return True.""" + cfg = {"model_type": "gemma4"} + (tmp_path / "config.json").write_text(json.dumps(cfg)) + + assert _check_config_needs_550(str(tmp_path)) is True + + def test_llama_architecture(self, tmp_path: Path): + """config.json with LlamaForCausalLM should return False.""" + cfg = {"architectures": ["LlamaForCausalLM"], "model_type": "llama"} + (tmp_path / "config.json").write_text(json.dumps(cfg)) + + assert _check_config_needs_550(str(tmp_path)) is False + + def test_no_config_json(self, tmp_path: Path): + """Missing config.json should return False (fail-open).""" + # Patch network call to avoid real fetch + with patch("urllib.request.urlopen") as mock_urlopen: + mock_urlopen.side_effect = Exception("no network") + assert _check_config_needs_550(str(tmp_path)) is False + + def test_result_is_cached(self, tmp_path: Path): + """Subsequent calls should use the cache.""" + cfg = {"architectures": ["Gemma4ForConditionalGeneration"]} + (tmp_path / "config.json").write_text(json.dumps(cfg)) + + key = str(tmp_path) + _check_config_needs_550(key) + assert key in _config_needs_550_cache + assert _config_needs_550_cache[key] is True + + def test_local_file_skips_network(self, tmp_path: Path): + """When local config.json exists, no network request should be made.""" + cfg = {"architectures": ["LlamaForCausalLM"]} + (tmp_path / "config.json").write_text(json.dumps(cfg)) + + with patch("urllib.request.urlopen") as mock_urlopen: + _check_config_needs_550(str(tmp_path)) + mock_urlopen.assert_not_called() + + +# --------------------------------------------------------------------------- +# get_transformers_tier — tier detection +# --------------------------------------------------------------------------- + + +class TestGetTransformersTier: + """Tests for get_transformers_tier() tiered version detection.""" + + def setup_method(self): + _tokenizer_class_cache.clear() + _config_needs_550_cache.clear() + + def test_gemma4_substring_returns_550(self): + assert get_transformers_tier("google/gemma-4-E2B-it") == "550" + + def test_gemma4_alt_substring_returns_550(self): + assert get_transformers_tier("unsloth/gemma4-E4B-it") == "550" + + def test_gemma4_config_json_returns_550(self, tmp_path: Path): + """Local checkpoint with Gemma4 architecture → 550.""" + cfg = {"architectures": ["Gemma4ForConditionalGeneration"], "model_type": "gemma4"} + (tmp_path / "config.json").write_text(json.dumps(cfg)) + + assert get_transformers_tier(str(tmp_path)) == "550" + + def test_qwen35_returns_530(self): + with patch( + "utils.transformers_version._check_config_needs_550", + return_value = False, + ): + assert get_transformers_tier("Qwen/Qwen3.5-9B") == "530" + + def test_ministral_returns_530(self): + with patch( + "utils.transformers_version._check_config_needs_550", + return_value = False, + ): + assert get_transformers_tier("mistralai/Ministral-3-8B-Instruct-2512") == "530" + + def test_llama_returns_default(self): + with patch( + "utils.transformers_version._check_config_needs_550", + return_value = False, + ), patch( + "utils.transformers_version._check_tokenizer_config_needs_v5", + return_value = False, + ): + assert get_transformers_tier("meta-llama/Llama-3-8B") == "default" + + def test_550_checked_before_530(self): + """Ensure 5.5.0 is checked first — a model matching both should get 550.""" + # This shouldn't happen in practice, but verifies priority + assert get_transformers_tier("gemma-4-model") == "550" + + def test_needs_transformers_5_compat(self): + """needs_transformers_5 should return True for both 530 and 550 models.""" + assert needs_transformers_5("google/gemma-4-E2B-it") is True + with patch( + "utils.transformers_version._check_config_needs_550", + return_value = False, + ): + assert needs_transformers_5("Qwen/Qwen3.5-9B") is True + with patch( + "utils.transformers_version._check_config_needs_550", + return_value = False, + ), patch( + "utils.transformers_version._check_tokenizer_config_needs_v5", + return_value = False, + ): + assert needs_transformers_5("meta-llama/Llama-3-8B") is False diff --git a/studio/backend/utils/models/model_config.py b/studio/backend/utils/models/model_config.py index 61226e52cb..5be0b183d5 100644 --- a/studio/backend/utils/models/model_config.py +++ b/studio/backend/utils/models/model_config.py @@ -493,8 +493,9 @@ def load_model_config( "minicpmv", } -# Pre-computed .venv_t5 path and backend dir for subprocess version switching. -_VENV_T5_DIR = str(Path.home() / ".unsloth" / "studio" / ".venv_t5") +# Pre-computed .venv_t5 paths and backend dir for subprocess version switching. +# Vision check uses 5.5.0 (newest, recognizes all architectures). +_VENV_T5_DIR = str(Path.home() / ".unsloth" / "studio" / ".venv_t5_550") _BACKEND_DIR = str(Path(__file__).resolve().parent.parent.parent) # Inline script executed in a subprocess with transformers 5.x activated. diff --git a/studio/backend/utils/transformers_version.py b/studio/backend/utils/transformers_version.py index 07e4a5c000..a9a026895b 100644 --- a/studio/backend/utils/transformers_version.py +++ b/studio/backend/utils/transformers_version.py @@ -5,20 +5,25 @@ Automatic transformers version switching. Some newer model architectures (Ministral-3, GLM-4.7-Flash, Qwen3-30B-A3B MoE, -tiny_qwen3_moe) require transformers>=5.3.0, while everything else needs the -default 4.57.x that ships with Unsloth. +tiny_qwen3_moe) require transformers>=5.3.0, while Gemma 4 models require +transformers>=5.5.0. Everything else needs the default 4.57.x that ships +with Unsloth. + +Two separate target directories are maintained: + - .venv_t5_530/ — transformers 5.3.0 (Ministral-3, GLM, Qwen3 MoE, etc.) + - .venv_t5_550/ — transformers 5.5.0 (Gemma 4) When loading a LoRA adapter with a custom name, we resolve the base model from ``adapter_config.json`` and check *that* against the model list. Strategy: Training and inference run in subprocesses that activate the correct version - via sys.path (prepending .venv_t5/ for 5.x models). See: + via sys.path (prepending the appropriate .venv_t5_*/ directory). See: - core/training/worker.py - core/inference/worker.py For export (still in-process), ensure_transformers_version() does a lightweight - sys.path swap using the same .venv_t5/ directory pre-installed by setup.sh. + sys.path swap using the same directories pre-installed by setup.sh. """ import importlib @@ -39,7 +44,7 @@ # --------------------------------------------------------------------------- # Lowercase substrings — if ANY appears anywhere in the lowered model name, -# we need transformers 5.x. +# we need transformers 5.3.0. TRANSFORMERS_5_MODEL_SUBSTRINGS: tuple[str, ...] = ( "ministral-3-", # Ministral-3-{3,8,14}B-{Instruct,Reasoning,Base}-2512 "glm-4.7-flash", # GLM-4.7-Flash @@ -47,10 +52,23 @@ "qwen3.5", # Qwen3.5 family (35B-A3B, etc.) "qwen3-next", # Qwen3-Next and variants "tiny_qwen3_moe", # imdatta0/tiny_qwen3_moe_2.8B_0.7B +) + +# Lowercase substrings for models that require transformers 5.5.0 (checked first). +TRANSFORMERS_550_MODEL_SUBSTRINGS: tuple[str, ...] = ( "gemma-4", # Gemma-4 (E2B-it, E4B-it, 31B-it, 26B-A4B-it) "gemma4", # Gemma-4 alternate naming ) +# Architecture classes / model_type values that require transformers 5.5.0. +# Checked via config.json (local or HuggingFace). +_TRANSFORMERS_550_ARCHITECTURES: set[str] = { + "Gemma4ForConditionalGeneration", +} +_TRANSFORMERS_550_MODEL_TYPES: set[str] = { + "gemma4", +} + # Tokenizer classes that only exist in transformers>=5.x _TRANSFORMERS_5_TOKENIZER_CLASSES: set[str] = { "TokenizersBackend", @@ -59,12 +77,21 @@ # Cache for dynamic tokenizer_config.json lookups to avoid repeated fetches _tokenizer_class_cache: dict[str, bool] = {} +# Cache for dynamic config.json lookups (architecture/model_type checks) +_config_needs_550_cache: dict[str, bool] = {} + # Versions -TRANSFORMERS_5_VERSION = "5.5.0" +TRANSFORMERS_550_VERSION = "5.5.0" +TRANSFORMERS_530_VERSION = "5.3.0" TRANSFORMERS_DEFAULT_VERSION = "4.57.6" +# Backwards-compat alias used by other modules +TRANSFORMERS_5_VERSION = TRANSFORMERS_550_VERSION -# Pre-installed directory for transformers 5.x — created by setup.sh / setup.ps1 -_VENV_T5_DIR = str(Path.home() / ".unsloth" / "studio" / ".venv_t5") +# Pre-installed directories — created by setup.sh / setup.ps1 +_VENV_T5_530_DIR = str(Path.home() / ".unsloth" / "studio" / ".venv_t5_530") +_VENV_T5_550_DIR = str(Path.home() / ".unsloth" / "studio" / ".venv_t5_550") +# Backwards-compat alias +_VENV_T5_DIR = _VENV_T5_550_DIR def _resolve_base_model(model_name: str) -> str: @@ -192,18 +219,105 @@ def _check_tokenizer_config_needs_v5(model_name: str) -> bool: return False -def needs_transformers_5(model_name: str) -> bool: - """Return True if *model_name* belongs to an architecture that requires - ``transformers>=5.3.0``. +def _check_config_needs_550(model_name: str) -> bool: + """Check ``config.json`` for architectures or model_type that require + transformers 5.5.0 (e.g. Gemma 4). - First checks the hardcoded substring list for known models, then - dynamically fetches ``tokenizer_config.json`` from HuggingFace to check - if the tokenizer_class (e.g. ``TokenizersBackend``) requires v5. + Checks locally first, then falls back to fetching from HuggingFace. + Results are cached in ``_config_needs_550_cache``. + Returns False on any error (fail-open to lower tier). + """ + if model_name in _config_needs_550_cache: + return _config_needs_550_cache[model_name] + + def _check_cfg(cfg: dict) -> bool: + archs = cfg.get("architectures", []) + if any(a in _TRANSFORMERS_550_ARCHITECTURES for a in archs): + return True + if cfg.get("model_type") in _TRANSFORMERS_550_MODEL_TYPES: + return True + return False + + # --- Check local config.json first ------------------------------------ + local_path = Path(model_name) + local_cfg = local_path / "config.json" + if local_cfg.is_file(): + try: + with open(local_cfg) as f: + cfg = json.load(f) + result = _check_cfg(cfg) + if result: + logger.info( + "Local config.json check: %s needs transformers 5.5.0 " + "(architectures=%s, model_type=%s)", + model_name, + cfg.get("architectures", []), + cfg.get("model_type"), + ) + _config_needs_550_cache[model_name] = result + return result + except Exception as exc: + logger.debug("Could not read %s: %s", local_cfg, exc) + + # --- Fall back to fetching from HuggingFace --------------------------- + import urllib.request + + url = f"https://huggingface.co/{model_name}/raw/main/config.json" + try: + req = urllib.request.Request(url, headers = {"User-Agent": "unsloth-studio"}) + with urllib.request.urlopen(req, timeout = 10) as resp: + cfg = json.loads(resp.read().decode()) + result = _check_cfg(cfg) + if result: + logger.info( + "Dynamic config.json check: %s needs transformers 5.5.0 " + "(architectures=%s, model_type=%s)", + model_name, + cfg.get("architectures", []), + cfg.get("model_type"), + ) + _config_needs_550_cache[model_name] = result + return result + except Exception as exc: + logger.debug( + "Could not fetch config.json for '%s': %s", model_name, exc + ) + _config_needs_550_cache[model_name] = False + return False + + +def get_transformers_tier(model_name: str) -> str: + """Return the transformers tier required for *model_name*. + + Returns ``"550"`` for models needing transformers 5.5.0 (e.g. Gemma 4), + ``"530"`` for models needing transformers 5.3.0 (e.g. Ministral-3, Qwen3 MoE), + or ``"default"`` for everything else (4.57.x). + + The 5.5.0 check runs first, then 5.3.0. """ lowered = model_name.lower() + + # --- Check 5.5.0 first ------------------------------------------------ + if any(sub in lowered for sub in TRANSFORMERS_550_MODEL_SUBSTRINGS): + return "550" + if _check_config_needs_550(model_name): + return "550" + + # --- Check 5.3.0 ------------------------------------------------------ if any(sub in lowered for sub in TRANSFORMERS_5_MODEL_SUBSTRINGS): - return True - return _check_tokenizer_config_needs_v5(model_name) + return "530" + if _check_tokenizer_config_needs_v5(model_name): + return "530" + + return "default" + + +def needs_transformers_5(model_name: str) -> bool: + """Return True if *model_name* requires any transformers 5.x version. + + Convenience wrapper around :func:`get_transformers_tier`. + """ + return get_transformers_tier(model_name) != "default" # --------------------------------------------------------------------------- @@ -258,27 +372,36 @@ def _purge_modules() -> int: return len(to_remove) -_VENV_T5_PACKAGES = ( - f"transformers=={TRANSFORMERS_5_VERSION}", +_VENV_T5_530_PACKAGES = ( + f"transformers=={TRANSFORMERS_530_VERSION}", + "huggingface_hub==1.8.0", + "hf_xet==1.4.2", + "tiktoken", +) + +_VENV_T5_550_PACKAGES = ( + f"transformers=={TRANSFORMERS_550_VERSION}", "huggingface_hub==1.8.0", "hf_xet==1.4.2", "tiktoken", ) +# Backwards-compat alias +_VENV_T5_PACKAGES = _VENV_T5_550_PACKAGES -def _venv_t5_is_valid() -> bool: - """Return True if .venv_t5/ has all required packages at the correct versions.""" - if not os.path.isdir(_VENV_T5_DIR) or not os.listdir(_VENV_T5_DIR): + +def _venv_dir_is_valid(venv_dir: str, packages: tuple[str, ...]) -> bool: + """Return True if *venv_dir* has all *packages* at the correct versions.""" + if not os.path.isdir(venv_dir) or not os.listdir(venv_dir): return False - # Check that the key package directories exist AND match the required version - for pkg_spec in _VENV_T5_PACKAGES: + for pkg_spec in packages: parts = pkg_spec.split("==") pkg_name = parts[0] pkg_version = parts[1] if len(parts) > 1 else None pkg_name_norm = pkg_name.replace("-", "_") # Check directory exists if not any( - (Path(_VENV_T5_DIR) / d).is_dir() + (Path(venv_dir) / d).is_dir() for d in (pkg_name_norm, pkg_name_norm.replace("_", "-")) ): return False @@ -287,7 +410,7 @@ def _venv_t5_is_valid() -> bool: continue # Check version via .dist-info metadata dist_info_found = False - for di in Path(_VENV_T5_DIR).glob(f"{pkg_name_norm}-*.dist-info"): + for di in Path(venv_dir).glob(f"{pkg_name_norm}-*.dist-info"): metadata = di / "METADATA" if not metadata.is_file(): continue @@ -296,7 +419,8 @@ def _venv_t5_is_valid() -> bool: installed_ver = line.split(":", 1)[1].strip() if installed_ver != pkg_version: logger.info( - ".venv_t5 has %s==%s but need %s", + "%s has %s==%s but need %s", + venv_dir, pkg_name, installed_ver, pkg_version, @@ -311,8 +435,13 @@ def _venv_t5_is_valid() -> bool: return True -def _install_to_venv_t5(pkg: str) -> bool: - """Install a single package into .venv_t5/, preferring uv then pip.""" +def _venv_t5_is_valid() -> bool: + """Backwards-compat: check the 5.5.0 venv.""" + return _venv_dir_is_valid(_VENV_T5_550_DIR, _VENV_T5_550_PACKAGES) + + +def _install_to_dir(pkg: str, target_dir: str) -> bool: + """Install a single package into *target_dir*, preferring uv then pip.""" # Try uv first (faster) if already on PATH -- do NOT install uv at runtime if shutil.which("uv"): result = subprocess.run( @@ -323,7 +452,7 @@ def _install_to_venv_t5(pkg: str) -> bool: "--python", sys.executable, "--target", - _VENV_T5_DIR, + target_dir, "--no-deps", "--upgrade", pkg, @@ -344,7 +473,7 @@ def _install_to_venv_t5(pkg: str) -> bool: "pip", "install", "--target", - _VENV_T5_DIR, + target_dir, "--no-deps", "--upgrade", pkg, @@ -359,47 +488,58 @@ def _install_to_venv_t5(pkg: str) -> bool: return True -def _ensure_venv_t5_exists() -> bool: - """Ensure .venv_t5/ exists with all required packages. Install if missing.""" - if _venv_t5_is_valid(): +def _ensure_venv_dir(venv_dir: str, packages: tuple[str, ...], label: str) -> bool: + """Ensure *venv_dir* exists with all *packages*. Install if missing.""" + if _venv_dir_is_valid(venv_dir, packages): return True logger.warning( - ".venv_t5 not found or incomplete at %s -- installing at runtime", _VENV_T5_DIR + "%s not found or incomplete at %s -- installing at runtime", label, venv_dir ) - shutil.rmtree(_VENV_T5_DIR, ignore_errors = True) - os.makedirs(_VENV_T5_DIR, exist_ok = True) - for pkg in _VENV_T5_PACKAGES: - if not _install_to_venv_t5(pkg): + shutil.rmtree(venv_dir, ignore_errors = True) + os.makedirs(venv_dir, exist_ok = True) + for pkg in packages: + if not _install_to_dir(pkg, venv_dir): return False - logger.info("Installed transformers 5.x to %s", _VENV_T5_DIR) + logger.info("Installed %s to %s", label, venv_dir) return True -def _activate_5x() -> None: - """Prepend .venv_t5/ to sys.path, purge stale modules, reimport.""" - if not _ensure_venv_t5_exists(): - raise RuntimeError( - f"Cannot activate transformers 5.x: .venv_t5 missing at {_VENV_T5_DIR}" - ) +def _ensure_venv_t5_530_exists() -> bool: + """Ensure .venv_t5_530/ exists with transformers 5.3.0.""" + return _ensure_venv_dir(_VENV_T5_530_DIR, _VENV_T5_530_PACKAGES, "transformers 5.3.0") + + +def _ensure_venv_t5_550_exists() -> bool: + """Ensure .venv_t5_550/ exists with transformers 5.5.0.""" + return _ensure_venv_dir(_VENV_T5_550_DIR, _VENV_T5_550_PACKAGES, "transformers 5.5.0") - if _VENV_T5_DIR not in sys.path: - sys.path.insert(0, _VENV_T5_DIR) - logger.info("Prepended %s to sys.path", _VENV_T5_DIR) + +def _ensure_venv_t5_exists() -> bool: + """Backwards-compat: ensure the 5.5.0 venv exists.""" + return _ensure_venv_t5_550_exists() + + +def _activate_venv(venv_dir: str, label: str) -> None: + """Prepend *venv_dir* to sys.path, purge stale modules, reimport.""" + if venv_dir not in sys.path: + sys.path.insert(0, venv_dir) + logger.info("Prepended %s to sys.path", venv_dir) count = _purge_modules() logger.info("Purged %d cached modules", count) import transformers - logger.info("Loaded transformers %s", transformers.__version__) + logger.info("Loaded transformers %s (%s)", transformers.__version__, label) def _deactivate_5x() -> None: - """Remove .venv_t5/ from sys.path, purge stale modules, reimport.""" - while _VENV_T5_DIR in sys.path: - sys.path.remove(_VENV_T5_DIR) - logger.info("Removed %s from sys.path", _VENV_T5_DIR) + """Remove all .venv_t5_*/ dirs from sys.path, purge stale modules, reimport.""" + for d in (_VENV_T5_530_DIR, _VENV_T5_550_DIR): + while d in sys.path: + sys.path.remove(d) + logger.info("Removed venv_t5 dirs from sys.path") count = _purge_modules() logger.info("Purged %d cached modules", count) @@ -412,9 +552,10 @@ def _deactivate_5x() -> None: def ensure_transformers_version(model_name: str) -> None: """Ensure the correct ``transformers`` version is active for *model_name*. - Uses sys.path with .venv_t5/ (pre-installed by setup.sh): - • Need 5.x → prepend .venv_t5/ to sys.path, purge modules. - • Need 4.x → remove .venv_t5/ from sys.path, purge modules. + Uses sys.path with .venv_t5_530/ or .venv_t5_550/ (pre-installed by setup.sh): + • Need 5.5.0 → prepend .venv_t5_550/ to sys.path, purge modules. + • Need 5.3.0 → prepend .venv_t5_530/ to sys.path, purge modules. + • Need 4.x → remove all .venv_t5_*/ from sys.path, purge modules. For LoRA adapters with custom names, the base model is resolved from ``adapter_config.json`` before checking. @@ -424,8 +565,21 @@ def ensure_transformers_version(model_name: str) -> None: """ # Resolve LoRA adapters to their base model for accurate detection resolved = _resolve_base_model(model_name) - want_5 = needs_transformers_5(resolved) - target_version = TRANSFORMERS_5_VERSION if want_5 else TRANSFORMERS_DEFAULT_VERSION + tier = get_transformers_tier(resolved) + + if tier == "550": + target_version = TRANSFORMERS_550_VERSION + venv_dir = _VENV_T5_550_DIR + ensure_fn = _ensure_venv_t5_550_exists + elif tier == "530": + target_version = TRANSFORMERS_530_VERSION + venv_dir = _VENV_T5_530_DIR + ensure_fn = _ensure_venv_t5_530_exists + else: + target_version = TRANSFORMERS_DEFAULT_VERSION + venv_dir = None + ensure_fn = None + target_major = int(target_version.split(".")[0]) # Check what's actually loaded in memory @@ -441,8 +595,17 @@ def ensure_transformers_version(model_name: str) -> None: # --- Already correct? --------------------------------------------------- if in_memory is not None: + if in_memory == target_version: + logger.info( + "transformers %s already loaded — correct for '%s'", + in_memory, + model_name, + ) + return + # Different 5.x → need to switch (e.g. 5.3.0 loaded but need 5.5.0) in_memory_major = int(in_memory.split(".")[0]) - if in_memory_major == target_major: + if in_memory_major == target_major and venv_dir is None: + # Both are default (4.x) — close enough logger.info( "transformers %s already loaded — correct for '%s'", in_memory, @@ -451,9 +614,16 @@ def ensure_transformers_version(model_name: str) -> None: return # --- Switch version ----------------------------------------------------- - if want_5: - logger.info("Activating transformers %s via .venv_t5…", TRANSFORMERS_5_VERSION) - _activate_5x() + if venv_dir is not None: + # First remove any other 5.x venv from sys.path + _deactivate_5x() + if not ensure_fn(): + raise RuntimeError( + f"Cannot activate transformers {target_version}: " + f"venv missing at {venv_dir}" + ) + logger.info("Activating transformers %s…", target_version) + _activate_venv(venv_dir, f"transformers {target_version}") else: logger.info( "Reverting to default transformers %s…", TRANSFORMERS_DEFAULT_VERSION diff --git a/studio/setup.ps1 b/studio/setup.ps1 index 60dfce0661..fa853cdadf 100644 --- a/studio/setup.ps1 +++ b/studio/setup.ps1 @@ -1579,48 +1579,88 @@ if ($stackExit -ne 0) { exit 1 } -# ── Pre-install transformers 5.x into .venv_t5/ ── -# Models like GLM-4.7-Flash need transformers>=5.3.0. Instead of pip-installing -# at runtime (slow, ~10-15s), we pre-install into a separate directory. -# The training subprocess just prepends .venv_t5/ to sys.path -- instant switch. +# ── Pre-install transformers 5.x into .venv_t5_530/ and .venv_t5_550/ ── +# Models like GLM-4.7-Flash, Qwen3 MoE need transformers>=5.3.0. +# Gemma 4 models need transformers>=5.5.0. +# Pre-install into separate directories to avoid runtime pip overhead. +# The training subprocess prepends the appropriate dir to sys.path. Write-Host "" -substep "pre-installing transformers 5.x for newer model support..." -$VenvT5Dir = Join-Path $env:USERPROFILE ".unsloth\studio\.venv_t5" -if (Test-Path $VenvT5Dir) { Remove-Item -Recurse -Force $VenvT5Dir } -New-Item -ItemType Directory -Path $VenvT5Dir -Force | Out-Null + +# Clean up legacy single .venv_t5 directory +$VenvT5Legacy = Join-Path $env:USERPROFILE ".unsloth\studio\.venv_t5" +if (Test-Path $VenvT5Legacy) { Remove-Item -Recurse -Force $VenvT5Legacy } + $prevEAP_t5 = $ErrorActionPreference $ErrorActionPreference = "Continue" + +# --- .venv_t5_530 (transformers 5.3.0) --- +substep "pre-installing transformers 5.3.0 for newer model support..." +$VenvT5_530Dir = Join-Path $env:USERPROFILE ".unsloth\studio\.venv_t5_530" +if (Test-Path $VenvT5_530Dir) { Remove-Item -Recurse -Force $VenvT5_530Dir } +New-Item -ItemType Directory -Path $VenvT5_530Dir -Force | Out-Null +foreach ($pkg in @("transformers==5.3.0", "huggingface_hub==1.8.0", "hf_xet==1.4.2")) { + if ($script:UnslothVerbose) { + Fast-Install --target $VenvT5_530Dir --no-deps $pkg + $t5PkgExit = $LASTEXITCODE + $output = "" + } else { + $output = Fast-Install --target $VenvT5_530Dir --no-deps $pkg | Out-String + $t5PkgExit = $LASTEXITCODE + } + if ($t5PkgExit -ne 0) { + Write-Host "[FAIL] Could not install $pkg into .venv_t5_530/" -ForegroundColor Red + Write-Host $output -ForegroundColor Red + $ErrorActionPreference = $prevEAP_t5 + exit 1 + } +} +if ($script:UnslothVerbose) { + Fast-Install --target $VenvT5_530Dir tiktoken + $tiktokenInstallExit = $LASTEXITCODE + $output = "" +} else { + $output = Fast-Install --target $VenvT5_530Dir tiktoken | Out-String + $tiktokenInstallExit = $LASTEXITCODE +} +if ($tiktokenInstallExit -ne 0) { + substep "Could not install tiktoken into .venv_t5_530/ -- Qwen tokenizers may fail" "Yellow" +} +step "transformers" "5.3.0 pre-installed" + +# --- .venv_t5_550 (transformers 5.5.0) --- +substep "pre-installing transformers 5.5.0 for Gemma 4 support..." +$VenvT5_550Dir = Join-Path $env:USERPROFILE ".unsloth\studio\.venv_t5_550" +if (Test-Path $VenvT5_550Dir) { Remove-Item -Recurse -Force $VenvT5_550Dir } +New-Item -ItemType Directory -Path $VenvT5_550Dir -Force | Out-Null foreach ($pkg in @("transformers==5.5.0", "huggingface_hub==1.8.0", "hf_xet==1.4.2")) { if ($script:UnslothVerbose) { - Fast-Install --target $VenvT5Dir --no-deps $pkg + Fast-Install --target $VenvT5_550Dir --no-deps $pkg $t5PkgExit = $LASTEXITCODE $output = "" } else { - $output = Fast-Install --target $VenvT5Dir --no-deps $pkg | Out-String + $output = Fast-Install --target $VenvT5_550Dir --no-deps $pkg | Out-String $t5PkgExit = $LASTEXITCODE } if ($t5PkgExit -ne 0) { - Write-Host "[FAIL] Could not install $pkg into .venv_t5/" -ForegroundColor Red + Write-Host "[FAIL] Could not install $pkg into .venv_t5_550/" -ForegroundColor Red Write-Host $output -ForegroundColor Red $ErrorActionPreference = $prevEAP_t5 exit 1 } } -# tiktoken is needed by Qwen-family tokenizers -- install with deps since -# regex/requests may be missing on Windows if ($script:UnslothVerbose) { - Fast-Install --target $VenvT5Dir tiktoken + Fast-Install --target $VenvT5_550Dir tiktoken $tiktokenInstallExit = $LASTEXITCODE $output = "" } else { - $output = Fast-Install --target $VenvT5Dir tiktoken | Out-String + $output = Fast-Install --target $VenvT5_550Dir tiktoken | Out-String $tiktokenInstallExit = $LASTEXITCODE } if ($tiktokenInstallExit -ne 0) { - substep "Could not install tiktoken into .venv_t5/ -- Qwen tokenizers may fail" "Yellow" + substep "Could not install tiktoken into .venv_t5_550/ -- Qwen tokenizers may fail" "Yellow" } $ErrorActionPreference = $prevEAP_t5 -step "transformers" "5.x pre-installed" +step "transformers" "5.5.0 pre-installed" } else { step "python" "dependencies up to date" diff --git a/studio/setup.sh b/studio/setup.sh index e3ff2da35c..72df82f601 100755 --- a/studio/setup.sh +++ b/studio/setup.sh @@ -394,11 +394,14 @@ fi # ── Python venv + deps ── STUDIO_HOME="$HOME/.unsloth/studio" VENV_DIR="$STUDIO_HOME/unsloth_studio" -VENV_T5_DIR="$STUDIO_HOME/.venv_t5" +VENV_T5_530_DIR="$STUDIO_HOME/.venv_t5_530" +VENV_T5_550_DIR="$STUDIO_HOME/.venv_t5_550" [ -d "$REPO_ROOT/.venv" ] && rm -rf "$REPO_ROOT/.venv" [ -d "$REPO_ROOT/.venv_overlay" ] && rm -rf "$REPO_ROOT/.venv_overlay" [ -d "$REPO_ROOT/.venv_t5" ] && rm -rf "$REPO_ROOT/.venv_t5" +[ -d "$REPO_ROOT/.venv_t5_530" ] && rm -rf "$REPO_ROOT/.venv_t5_530" +[ -d "$REPO_ROOT/.venv_t5_550" ] && rm -rf "$REPO_ROOT/.venv_t5_550" # Note: do NOT delete $STUDIO_HOME/.venv here — install.sh handles migration _COLAB_NO_VENV=false @@ -502,16 +505,28 @@ fi if [ "$_SKIP_PYTHON_DEPS" = false ]; then install_python_stack - # ── 6b. Pre-install transformers 5.x into .venv_t5/ ── - # Models like GLM-4.7-Flash need transformers>=5.3.0. Instead of pip-installing - # at runtime (slow, ~10-15s), we pre-install into a separate directory. - # The training subprocess just prepends .venv_t5/ to sys.path -- instant switch. - mkdir -p "$VENV_T5_DIR" - run_quiet "install transformers 5.x" fast_install --target "$VENV_T5_DIR" --no-deps "transformers==5.5.0" - run_quiet "install huggingface_hub for t5" fast_install --target "$VENV_T5_DIR" --no-deps "huggingface_hub==1.8.0" - run_quiet "install hf_xet for t5" fast_install --target "$VENV_T5_DIR" --no-deps "hf_xet==1.4.2" - run_quiet "install tiktoken for t5" fast_install --target "$VENV_T5_DIR" "tiktoken" - step "transformers" "5.x pre-installed" + # ── 6b. Pre-install transformers 5.x into .venv_t5_530/ and .venv_t5_550/ ── + # Models like GLM-4.7-Flash, Qwen3 MoE need transformers>=5.3.0. + # Gemma 4 models need transformers>=5.5.0. + # Pre-install into separate directories to avoid runtime pip overhead. + # The training subprocess prepends the appropriate dir to sys.path. + + # Clean up legacy single .venv_t5 directory + [ -d "$STUDIO_HOME/.venv_t5" ] && rm -rf "$STUDIO_HOME/.venv_t5" + + mkdir -p "$VENV_T5_530_DIR" + run_quiet "install transformers 5.3.0" fast_install --target "$VENV_T5_530_DIR" --no-deps "transformers==5.3.0" + run_quiet "install huggingface_hub for t5_530" fast_install --target "$VENV_T5_530_DIR" --no-deps "huggingface_hub==1.8.0" + run_quiet "install hf_xet for t5_530" fast_install --target "$VENV_T5_530_DIR" --no-deps "hf_xet==1.4.2" + run_quiet "install tiktoken for t5_530" fast_install --target "$VENV_T5_530_DIR" "tiktoken" + step "transformers" "5.3.0 pre-installed" + + mkdir -p "$VENV_T5_550_DIR" + run_quiet "install transformers 5.5.0" fast_install --target "$VENV_T5_550_DIR" --no-deps "transformers==5.5.0" + run_quiet "install huggingface_hub for t5_550" fast_install --target "$VENV_T5_550_DIR" --no-deps "huggingface_hub==1.8.0" + run_quiet "install hf_xet for t5_550" fast_install --target "$VENV_T5_550_DIR" --no-deps "hf_xet==1.4.2" + run_quiet "install tiktoken for t5_550" fast_install --target "$VENV_T5_550_DIR" "tiktoken" + step "transformers" "5.5.0 pre-installed" else step "python" "dependencies up to date" verbose_substep "python deps check: installed=$_PKG_NAME@${INSTALLED_VER:-unknown} latest=${LATEST_VER:-unknown}" From e8328c11de8c14f8fd361bf15d8a98a1cbef83fc Mon Sep 17 00:00:00 2001 From: Roland Tannous Date: Mon, 6 Apr 2026 17:28:55 +0000 Subject: [PATCH 02/17] fix bfloat16 crash on T4 for FORCE_FLOAT32 models and disable trust_remote_code auto-enable for native t5 models --- studio/backend/core/training/worker.py | 8 ++++---- unsloth/models/loader.py | 4 +++- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/studio/backend/core/training/worker.py b/studio/backend/core/training/worker.py index db5cf8e948..9815e9d7b7 100644 --- a/studio/backend/core/training/worker.py +++ b/studio/backend/core/training/worker.py @@ -402,9 +402,9 @@ def run_training_process( # ── 1a. Auto-enable trust_remote_code for unsloth/* transformers 5.x models ── # Some newer architectures (e.g. NemotronH) have config parsing bugs in # transformers that require trust_remote_code=True as a workaround. - # Only auto-enable for unsloth/* prefixed models (trusted source). - # Exclude Gemma 4 since it is a native transformers 5.5 model and - # trust_remote_code=True would bypass the compiler (disabling fused CE). + # Only auto-enable for models that genuinely need it (set in YAML defaults). + # Native transformers 5.x models (Qwen3.5, Gemma 4, etc.) do NOT need it + # and enabling it can bypass the compiler (disabling fused CE). from utils.transformers_version import get_transformers_tier _lowered = model_name.lower() @@ -412,7 +412,7 @@ def run_training_process( if ( _tier != "default" and _lowered.startswith("unsloth/") - and _tier != "550" # Gemma 4 is native t5.5 — trust_remote_code bypasses compiler + and _tier not in ("530", "550") # Native t5 models don't need trust_remote_code and not config.get("trust_remote_code", False) ): config["trust_remote_code"] = True diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py index df97c5c7df..a34744dba8 100644 --- a/unsloth/models/loader.py +++ b/unsloth/models/loader.py @@ -1357,7 +1357,9 @@ def from_pretrained( or disable_name.lower() in model_types_all ) and ((dtype == torch.float16) or not SUPPORTS_BFLOAT16): os.environ["UNSLOTH_FORCE_FLOAT32"] = "1" - dtype = torch.bfloat16 # Change to bfloat16 loading + # Use bfloat16 storage where supported; fall back to float32 on + # older GPUs (e.g. T4) that lack native bfloat16 support. + dtype = torch.bfloat16 if SUPPORTS_BFLOAT16 else torch.float32 break # Apply gradient checkpointing with smart heuristics use_gradient_checkpointing = apply_unsloth_gradient_checkpointing( From efd11915b8efdefcdea5bf372a4e2e3b19ce3417 Mon Sep 17 00:00:00 2001 From: Roland Tannous Date: Mon, 6 Apr 2026 17:29:22 +0000 Subject: [PATCH 03/17] revert FORCE_FLOAT32 dtype change --- unsloth/models/loader.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py index a34744dba8..df97c5c7df 100644 --- a/unsloth/models/loader.py +++ b/unsloth/models/loader.py @@ -1357,9 +1357,7 @@ def from_pretrained( or disable_name.lower() in model_types_all ) and ((dtype == torch.float16) or not SUPPORTS_BFLOAT16): os.environ["UNSLOTH_FORCE_FLOAT32"] = "1" - # Use bfloat16 storage where supported; fall back to float32 on - # older GPUs (e.g. T4) that lack native bfloat16 support. - dtype = torch.bfloat16 if SUPPORTS_BFLOAT16 else torch.float32 + dtype = torch.bfloat16 # Change to bfloat16 loading break # Apply gradient checkpointing with smart heuristics use_gradient_checkpointing = apply_unsloth_gradient_checkpointing( From 3400654afc95a26dee687b35e15ea229aa16a10d Mon Sep 17 00:00:00 2001 From: Roland Tannous Date: Mon, 6 Apr 2026 18:20:08 +0000 Subject: [PATCH 04/17] restrict trust_remote_code auto-enable to Nemotron models only --- studio/backend/core/inference/worker.py | 13 +++++-------- studio/backend/core/training/worker.py | 20 +++++++------------- 2 files changed, 12 insertions(+), 21 deletions(-) diff --git a/studio/backend/core/inference/worker.py b/studio/backend/core/inference/worker.py index 3c5363cb77..1010e56dac 100644 --- a/studio/backend/core/inference/worker.py +++ b/studio/backend/core/inference/worker.py @@ -322,19 +322,16 @@ def _handle_load(backend, config: dict, resp_queue: Any) -> None: except Exception as e: logger.warning("Could not read adapter_config.json: %s", e) - # Auto-enable trust_remote_code for unsloth/* transformers 5.x models - # (matches the training worker logic in core/training/worker.py) + # Auto-enable trust_remote_code for Nemotron models only. + # NemotronH has config parsing bugs requiring trust_remote_code=True. + # Other transformers 5.x models are native and do NOT need it. trust_remote_code = config.get("trust_remote_code", False) if not trust_remote_code: - from utils.transformers_version import needs_transformers_5 - model_name = config["model_name"] - if needs_transformers_5(model_name) and model_name.lower().startswith( - "unsloth/" - ): + if "nemotron" in model_name.lower(): trust_remote_code = True logger.info( - "Auto-enabled trust_remote_code for unsloth/* transformers 5.x model: %s", + "Auto-enabled trust_remote_code for Nemotron model: %s", model_name, ) diff --git a/studio/backend/core/training/worker.py b/studio/backend/core/training/worker.py index 9815e9d7b7..b412a05cc9 100644 --- a/studio/backend/core/training/worker.py +++ b/studio/backend/core/training/worker.py @@ -399,25 +399,19 @@ def run_training_process( ) return - # ── 1a. Auto-enable trust_remote_code for unsloth/* transformers 5.x models ── - # Some newer architectures (e.g. NemotronH) have config parsing bugs in - # transformers that require trust_remote_code=True as a workaround. - # Only auto-enable for models that genuinely need it (set in YAML defaults). - # Native transformers 5.x models (Qwen3.5, Gemma 4, etc.) do NOT need it - # and enabling it can bypass the compiler (disabling fused CE). - from utils.transformers_version import get_transformers_tier - + # ── 1a. Auto-enable trust_remote_code for Nemotron models ── + # NemotronH has config parsing bugs in transformers that require + # trust_remote_code=True as a workaround. Other transformers 5.x models + # (Qwen3.5, Gemma 4, etc.) are native and do NOT need it — enabling it + # bypasses the compiler (disabling fused CE). _lowered = model_name.lower() - _tier = get_transformers_tier(model_name) if ( - _tier != "default" - and _lowered.startswith("unsloth/") - and _tier not in ("530", "550") # Native t5 models don't need trust_remote_code + "nemotron" in _lowered and not config.get("trust_remote_code", False) ): config["trust_remote_code"] = True logger.info( - "Auto-enabled trust_remote_code for unsloth/* transformers 5.x model: %s", + "Auto-enabled trust_remote_code for Nemotron model: %s", model_name, ) From 41b103adee260fa944e5dbf76a5688361c2b8164 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 6 Apr 2026 19:09:39 +0000 Subject: [PATCH 05/17] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- studio/backend/core/training/worker.py | 5 +-- .../tests/test_transformers_version.py | 44 ++++++++++++------- studio/backend/utils/transformers_version.py | 12 ++--- 3 files changed, 37 insertions(+), 24 deletions(-) diff --git a/studio/backend/core/training/worker.py b/studio/backend/core/training/worker.py index b412a05cc9..54667dd7af 100644 --- a/studio/backend/core/training/worker.py +++ b/studio/backend/core/training/worker.py @@ -405,10 +405,7 @@ def run_training_process( # (Qwen3.5, Gemma 4, etc.) are native and do NOT need it — enabling it # bypasses the compiler (disabling fused CE). _lowered = model_name.lower() - if ( - "nemotron" in _lowered - and not config.get("trust_remote_code", False) - ): + if "nemotron" in _lowered and not config.get("trust_remote_code", False): config["trust_remote_code"] = True logger.info( "Auto-enabled trust_remote_code for Nemotron model: %s", diff --git a/studio/backend/tests/test_transformers_version.py b/studio/backend/tests/test_transformers_version.py index 387c1c6f13..c031c2fea3 100644 --- a/studio/backend/tests/test_transformers_version.py +++ b/studio/backend/tests/test_transformers_version.py @@ -206,7 +206,10 @@ def setup_method(self): def test_gemma4_architecture(self, tmp_path: Path): """config.json with Gemma4ForConditionalGeneration should return True.""" - cfg = {"architectures": ["Gemma4ForConditionalGeneration"], "model_type": "gemma4"} + cfg = { + "architectures": ["Gemma4ForConditionalGeneration"], + "model_type": "gemma4", + } (tmp_path / "config.json").write_text(json.dumps(cfg)) assert _check_config_needs_550(str(tmp_path)) is True @@ -272,7 +275,10 @@ def test_gemma4_alt_substring_returns_550(self): def test_gemma4_config_json_returns_550(self, tmp_path: Path): """Local checkpoint with Gemma4 architecture → 550.""" - cfg = {"architectures": ["Gemma4ForConditionalGeneration"], "model_type": "gemma4"} + cfg = { + "architectures": ["Gemma4ForConditionalGeneration"], + "model_type": "gemma4", + } (tmp_path / "config.json").write_text(json.dumps(cfg)) assert get_transformers_tier(str(tmp_path)) == "550" @@ -289,15 +295,20 @@ def test_ministral_returns_530(self): "utils.transformers_version._check_config_needs_550", return_value = False, ): - assert get_transformers_tier("mistralai/Ministral-3-8B-Instruct-2512") == "530" + assert ( + get_transformers_tier("mistralai/Ministral-3-8B-Instruct-2512") == "530" + ) def test_llama_returns_default(self): - with patch( - "utils.transformers_version._check_config_needs_550", - return_value = False, - ), patch( - "utils.transformers_version._check_tokenizer_config_needs_v5", - return_value = False, + with ( + patch( + "utils.transformers_version._check_config_needs_550", + return_value = False, + ), + patch( + "utils.transformers_version._check_tokenizer_config_needs_v5", + return_value = False, + ), ): assert get_transformers_tier("meta-llama/Llama-3-8B") == "default" @@ -314,11 +325,14 @@ def test_needs_transformers_5_compat(self): return_value = False, ): assert needs_transformers_5("Qwen/Qwen3.5-9B") is True - with patch( - "utils.transformers_version._check_config_needs_550", - return_value = False, - ), patch( - "utils.transformers_version._check_tokenizer_config_needs_v5", - return_value = False, + with ( + patch( + "utils.transformers_version._check_config_needs_550", + return_value = False, + ), + patch( + "utils.transformers_version._check_tokenizer_config_needs_v5", + return_value = False, + ), ): assert needs_transformers_5("meta-llama/Llama-3-8B") is False diff --git a/studio/backend/utils/transformers_version.py b/studio/backend/utils/transformers_version.py index a9a026895b..7ba20a366b 100644 --- a/studio/backend/utils/transformers_version.py +++ b/studio/backend/utils/transformers_version.py @@ -279,9 +279,7 @@ def _check_cfg(cfg: dict) -> bool: _config_needs_550_cache[model_name] = result return result except Exception as exc: - logger.debug( - "Could not fetch config.json for '%s': %s", model_name, exc - ) + logger.debug("Could not fetch config.json for '%s': %s", model_name, exc) _config_needs_550_cache[model_name] = False return False @@ -507,12 +505,16 @@ def _ensure_venv_dir(venv_dir: str, packages: tuple[str, ...], label: str) -> bo def _ensure_venv_t5_530_exists() -> bool: """Ensure .venv_t5_530/ exists with transformers 5.3.0.""" - return _ensure_venv_dir(_VENV_T5_530_DIR, _VENV_T5_530_PACKAGES, "transformers 5.3.0") + return _ensure_venv_dir( + _VENV_T5_530_DIR, _VENV_T5_530_PACKAGES, "transformers 5.3.0" + ) def _ensure_venv_t5_550_exists() -> bool: """Ensure .venv_t5_550/ exists with transformers 5.5.0.""" - return _ensure_venv_dir(_VENV_T5_550_DIR, _VENV_T5_550_PACKAGES, "transformers 5.5.0") + return _ensure_venv_dir( + _VENV_T5_550_DIR, _VENV_T5_550_PACKAGES, "transformers 5.5.0" + ) def _ensure_venv_t5_exists() -> bool: From fc49ae24531780a658049e6c238f146960f466b6 Mon Sep 17 00:00:00 2001 From: Roland Tannous Date: Mon, 6 Apr 2026 19:40:11 +0000 Subject: [PATCH 06/17] use config.json model_type for tier detection, add unsloth/nvidia namespace guard --- studio/backend/core/inference/worker.py | 6 +- studio/backend/core/training/worker.py | 6 +- .../tests/test_transformers_version.py | 25 +++- studio/backend/utils/transformers_version.py | 121 ++++++++++-------- 4 files changed, 99 insertions(+), 59 deletions(-) diff --git a/studio/backend/core/inference/worker.py b/studio/backend/core/inference/worker.py index 1010e56dac..506e631c51 100644 --- a/studio/backend/core/inference/worker.py +++ b/studio/backend/core/inference/worker.py @@ -328,7 +328,11 @@ def _handle_load(backend, config: dict, resp_queue: Any) -> None: trust_remote_code = config.get("trust_remote_code", False) if not trust_remote_code: model_name = config["model_name"] - if "nemotron" in model_name.lower(): + _mn_lower = model_name.lower() + if ( + "nemotron" in _mn_lower + and (_mn_lower.startswith("unsloth/") or _mn_lower.startswith("nvidia/")) + ): trust_remote_code = True logger.info( "Auto-enabled trust_remote_code for Nemotron model: %s", diff --git a/studio/backend/core/training/worker.py b/studio/backend/core/training/worker.py index 54667dd7af..fa05817697 100644 --- a/studio/backend/core/training/worker.py +++ b/studio/backend/core/training/worker.py @@ -405,7 +405,11 @@ def run_training_process( # (Qwen3.5, Gemma 4, etc.) are native and do NOT need it — enabling it # bypasses the compiler (disabling fused CE). _lowered = model_name.lower() - if "nemotron" in _lowered and not config.get("trust_remote_code", False): + if ( + "nemotron" in _lowered + and (_lowered.startswith("unsloth/") or _lowered.startswith("nvidia/")) + and not config.get("trust_remote_code", False) + ): config["trust_remote_code"] = True logger.info( "Auto-enabled trust_remote_code for Nemotron model: %s", diff --git a/studio/backend/tests/test_transformers_version.py b/studio/backend/tests/test_transformers_version.py index c031c2fea3..609a154a9a 100644 --- a/studio/backend/tests/test_transformers_version.py +++ b/studio/backend/tests/test_transformers_version.py @@ -32,8 +32,9 @@ _resolve_base_model, _check_tokenizer_config_needs_v5, _check_config_needs_550, + _get_config_json, _tokenizer_class_cache, - _config_needs_550_cache, + _config_json_cache, needs_transformers_5, get_transformers_tier, ) @@ -202,7 +203,7 @@ class TestCheckConfigNeeds550: """Tests for _check_config_needs_550() local config.json checks.""" def setup_method(self): - _config_needs_550_cache.clear() + _config_json_cache.clear() def test_gemma4_architecture(self, tmp_path: Path): """config.json with Gemma4ForConditionalGeneration should return True.""" @@ -242,8 +243,8 @@ def test_result_is_cached(self, tmp_path: Path): key = str(tmp_path) _check_config_needs_550(key) - assert key in _config_needs_550_cache - assert _config_needs_550_cache[key] is True + assert key in _config_json_cache + assert _config_json_cache[key] is not None def test_local_file_skips_network(self, tmp_path: Path): """When local config.json exists, no network request should be made.""" @@ -265,7 +266,7 @@ class TestGetTransformersTier: def setup_method(self): _tokenizer_class_cache.clear() - _config_needs_550_cache.clear() + _config_json_cache.clear() def test_gemma4_substring_returns_550(self): assert get_transformers_tier("google/gemma-4-E2B-it") == "550" @@ -317,6 +318,20 @@ def test_550_checked_before_530(self): # This shouldn't happen in practice, but verifies priority assert get_transformers_tier("gemma-4-model") == "550" + def test_config_json_model_type_530(self, tmp_path: Path): + """Local checkpoint with qwen3_moe model_type → 530.""" + cfg = {"model_type": "qwen3_moe", "architectures": ["Qwen3MoeForCausalLM"]} + (tmp_path / "config.json").write_text(json.dumps(cfg)) + + assert get_transformers_tier(str(tmp_path)) == "530" + + def test_config_json_model_type_glm4_moe(self, tmp_path: Path): + """Local checkpoint with glm4_moe model_type → 530.""" + cfg = {"model_type": "glm4_moe", "architectures": ["Glm4MoeForCausalLM"]} + (tmp_path / "config.json").write_text(json.dumps(cfg)) + + assert get_transformers_tier(str(tmp_path)) == "530" + def test_needs_transformers_5_compat(self): """needs_transformers_5 should return True for both 530 and 550 models.""" assert needs_transformers_5("google/gemma-4-E2B-it") is True diff --git a/studio/backend/utils/transformers_version.py b/studio/backend/utils/transformers_version.py index 7ba20a366b..308f416940 100644 --- a/studio/backend/utils/transformers_version.py +++ b/studio/backend/utils/transformers_version.py @@ -60,14 +60,25 @@ "gemma4", # Gemma-4 alternate naming ) -# Architecture classes / model_type values that require transformers 5.5.0. -# Checked via config.json (local or HuggingFace). +# Architecture classes that require transformers 5.5.0. _TRANSFORMERS_550_ARCHITECTURES: set[str] = { "Gemma4ForConditionalGeneration", } + +# model_type values (from config.json) → tier mapping. _TRANSFORMERS_550_MODEL_TYPES: set[str] = { "gemma4", } +_TRANSFORMERS_530_MODEL_TYPES: set[str] = { + "qwen3_moe", + "qwen3_5_moe", + "qwen3_vl_moe", + "qwen3_next", + "deepseek_v3_moe", + "glm4_moe", + "glm4_moe_lite", + "ministral", +} # Tokenizer classes that only exist in transformers>=5.x _TRANSFORMERS_5_TOKENIZER_CLASSES: set[str] = { @@ -77,8 +88,8 @@ # Cache for dynamic tokenizer_config.json lookups to avoid repeated fetches _tokenizer_class_cache: dict[str, bool] = {} -# Cache for dynamic config.json lookups (architecture/model_type checks) -_config_needs_550_cache: dict[str, bool] = {} +# Cache for config.json lookups (returns the parsed dict or None) +_config_json_cache: dict[str, dict | None] = {} # Versions TRANSFORMERS_550_VERSION = "5.5.0" @@ -219,43 +230,28 @@ def _check_tokenizer_config_needs_v5(model_name: str) -> bool: return False -def _check_config_needs_550(model_name: str) -> bool: - """Check ``config.json`` for architectures or model_type that require - transformers 5.5.0 (e.g. Gemma 4). +_SENTINEL = object() # distinguishes "not cached" from "cached as None" - Checks locally first, then falls back to fetching from HuggingFace. - Results are cached in ``_config_needs_550_cache``. - Returns False on any error (fail-open to lower tier). - """ - if model_name in _config_needs_550_cache: - return _config_needs_550_cache[model_name] - def _check_cfg(cfg: dict) -> bool: - archs = cfg.get("architectures", []) - if any(a in _TRANSFORMERS_550_ARCHITECTURES for a in archs): - return True - if cfg.get("model_type") in _TRANSFORMERS_550_MODEL_TYPES: - return True - return False +def _get_config_json(model_name: str) -> dict | None: + """Read and cache ``config.json`` for *model_name*. + + Checks local path first, then fetches from HuggingFace. + Returns the parsed dict, or ``None`` on any error (fail-open). + The result is cached in ``_config_json_cache``. + """ + cached = _config_json_cache.get(model_name, _SENTINEL) + if cached is not _SENTINEL: + return cached # --- Check local config.json first ------------------------------------ - local_path = Path(model_name) - local_cfg = local_path / "config.json" + local_cfg = Path(model_name) / "config.json" if local_cfg.is_file(): try: with open(local_cfg) as f: cfg = json.load(f) - result = _check_cfg(cfg) - if result: - logger.info( - "Local config.json check: %s needs transformers 5.5.0 " - "(architectures=%s, model_type=%s)", - model_name, - cfg.get("architectures", []), - cfg.get("model_type"), - ) - _config_needs_550_cache[model_name] = result - return result + _config_json_cache[model_name] = cfg + return cfg except Exception as exc: logger.debug("Could not read %s: %s", local_cfg, exc) @@ -267,21 +263,26 @@ def _check_cfg(cfg: dict) -> bool: req = urllib.request.Request(url, headers = {"User-Agent": "unsloth-studio"}) with urllib.request.urlopen(req, timeout = 10) as resp: cfg = json.loads(resp.read().decode()) - result = _check_cfg(cfg) - if result: - logger.info( - "Dynamic config.json check: %s needs transformers 5.5.0 " - "(architectures=%s, model_type=%s)", - model_name, - cfg.get("architectures", []), - cfg.get("model_type"), - ) - _config_needs_550_cache[model_name] = result - return result + _config_json_cache[model_name] = cfg + return cfg except Exception as exc: - logger.debug("Could not fetch config.json for '%s': %s", model_name, exc) - _config_needs_550_cache[model_name] = False + logger.debug( + "Could not fetch config.json for '%s': %s", model_name, exc + ) + _config_json_cache[model_name] = None + return None + + +def _check_config_needs_550(model_name: str) -> bool: + """Check ``config.json`` for architectures or model_type that require + transformers 5.5.0. Uses the shared ``_get_config_json`` cache.""" + cfg = _get_config_json(model_name) + if cfg is None: return False + archs = cfg.get("architectures", []) + if any(a in _TRANSFORMERS_550_ARCHITECTURES for a in archs): + return True + return cfg.get("model_type") in _TRANSFORMERS_550_MODEL_TYPES def get_transformers_tier(model_name: str) -> str: @@ -291,19 +292,35 @@ def get_transformers_tier(model_name: str) -> str: ``"530"`` for models needing transformers 5.3.0 (e.g. Ministral-3, Qwen3 MoE), or ``"default"`` for everything else (4.57.x). - The 5.5.0 check runs first, then 5.3.0. + Fast path: substring checks (no I/O) for both tiers run first. + Slow path: single config.json fetch (cached) checks model_type for + both tiers, then tokenizer_config.json as final fallback. """ lowered = model_name.lower() - # --- Check 5.5.0 first ------------------------------------------------ + # --- Fast substring checks (no I/O) ----------------------------------- if any(sub in lowered for sub in TRANSFORMERS_550_MODEL_SUBSTRINGS): return "550" - if _check_config_needs_550(model_name): - return "550" - - # --- Check 5.3.0 ------------------------------------------------------ if any(sub in lowered for sub in TRANSFORMERS_5_MODEL_SUBSTRINGS): return "530" + + # --- config.json model_type / architecture check (single fetch) ------- + cfg = _get_config_json(model_name) + if cfg is not None: + model_type = cfg.get("model_type", "") + archs = cfg.get("architectures", []) + + # Check 5.5.0 first + if model_type in _TRANSFORMERS_550_MODEL_TYPES: + return "550" + if any(a in _TRANSFORMERS_550_ARCHITECTURES for a in archs): + return "550" + + # Check 5.3.0 + if model_type in _TRANSFORMERS_530_MODEL_TYPES: + return "530" + + # --- Final fallback: tokenizer_config.json for 5.3.0 ------------------ if _check_tokenizer_config_needs_v5(model_name): return "530" From fb43d468e25379f28dd2477e6c24dd60cf55c099 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 6 Apr 2026 19:42:48 +0000 Subject: [PATCH 07/17] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- studio/backend/core/inference/worker.py | 5 ++--- studio/backend/utils/transformers_version.py | 4 +--- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/studio/backend/core/inference/worker.py b/studio/backend/core/inference/worker.py index 506e631c51..db504bac3a 100644 --- a/studio/backend/core/inference/worker.py +++ b/studio/backend/core/inference/worker.py @@ -329,9 +329,8 @@ def _handle_load(backend, config: dict, resp_queue: Any) -> None: if not trust_remote_code: model_name = config["model_name"] _mn_lower = model_name.lower() - if ( - "nemotron" in _mn_lower - and (_mn_lower.startswith("unsloth/") or _mn_lower.startswith("nvidia/")) + if "nemotron" in _mn_lower and ( + _mn_lower.startswith("unsloth/") or _mn_lower.startswith("nvidia/") ): trust_remote_code = True logger.info( diff --git a/studio/backend/utils/transformers_version.py b/studio/backend/utils/transformers_version.py index 308f416940..643a0a827a 100644 --- a/studio/backend/utils/transformers_version.py +++ b/studio/backend/utils/transformers_version.py @@ -266,9 +266,7 @@ def _get_config_json(model_name: str) -> dict | None: _config_json_cache[model_name] = cfg return cfg except Exception as exc: - logger.debug( - "Could not fetch config.json for '%s': %s", model_name, exc - ) + logger.debug("Could not fetch config.json for '%s': %s", model_name, exc) _config_json_cache[model_name] = None return None From 81d2581d2d58fbe7c50aae7c47739a0c9ad07db4 Mon Sep 17 00:00:00 2001 From: Roland Tannous Date: Mon, 6 Apr 2026 20:26:32 +0000 Subject: [PATCH 08/17] Revert "[pre-commit.ci] auto fixes from pre-commit.com hooks" This reverts commit fb43d468e25379f28dd2477e6c24dd60cf55c099. --- studio/backend/core/inference/worker.py | 5 +++-- studio/backend/utils/transformers_version.py | 4 +++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/studio/backend/core/inference/worker.py b/studio/backend/core/inference/worker.py index db504bac3a..506e631c51 100644 --- a/studio/backend/core/inference/worker.py +++ b/studio/backend/core/inference/worker.py @@ -329,8 +329,9 @@ def _handle_load(backend, config: dict, resp_queue: Any) -> None: if not trust_remote_code: model_name = config["model_name"] _mn_lower = model_name.lower() - if "nemotron" in _mn_lower and ( - _mn_lower.startswith("unsloth/") or _mn_lower.startswith("nvidia/") + if ( + "nemotron" in _mn_lower + and (_mn_lower.startswith("unsloth/") or _mn_lower.startswith("nvidia/")) ): trust_remote_code = True logger.info( diff --git a/studio/backend/utils/transformers_version.py b/studio/backend/utils/transformers_version.py index 643a0a827a..308f416940 100644 --- a/studio/backend/utils/transformers_version.py +++ b/studio/backend/utils/transformers_version.py @@ -266,7 +266,9 @@ def _get_config_json(model_name: str) -> dict | None: _config_json_cache[model_name] = cfg return cfg except Exception as exc: - logger.debug("Could not fetch config.json for '%s': %s", model_name, exc) + logger.debug( + "Could not fetch config.json for '%s': %s", model_name, exc + ) _config_json_cache[model_name] = None return None From 4df9e68e079c28aea834714577676f796bd85177 Mon Sep 17 00:00:00 2001 From: Roland Tannous Date: Mon, 6 Apr 2026 20:26:32 +0000 Subject: [PATCH 09/17] Revert "use config.json model_type for tier detection, add unsloth/nvidia namespace guard" This reverts commit fc49ae24531780a658049e6c238f146960f466b6. --- studio/backend/core/inference/worker.py | 6 +- studio/backend/core/training/worker.py | 6 +- .../tests/test_transformers_version.py | 25 +--- studio/backend/utils/transformers_version.py | 121 ++++++++---------- 4 files changed, 59 insertions(+), 99 deletions(-) diff --git a/studio/backend/core/inference/worker.py b/studio/backend/core/inference/worker.py index 506e631c51..1010e56dac 100644 --- a/studio/backend/core/inference/worker.py +++ b/studio/backend/core/inference/worker.py @@ -328,11 +328,7 @@ def _handle_load(backend, config: dict, resp_queue: Any) -> None: trust_remote_code = config.get("trust_remote_code", False) if not trust_remote_code: model_name = config["model_name"] - _mn_lower = model_name.lower() - if ( - "nemotron" in _mn_lower - and (_mn_lower.startswith("unsloth/") or _mn_lower.startswith("nvidia/")) - ): + if "nemotron" in model_name.lower(): trust_remote_code = True logger.info( "Auto-enabled trust_remote_code for Nemotron model: %s", diff --git a/studio/backend/core/training/worker.py b/studio/backend/core/training/worker.py index fa05817697..54667dd7af 100644 --- a/studio/backend/core/training/worker.py +++ b/studio/backend/core/training/worker.py @@ -405,11 +405,7 @@ def run_training_process( # (Qwen3.5, Gemma 4, etc.) are native and do NOT need it — enabling it # bypasses the compiler (disabling fused CE). _lowered = model_name.lower() - if ( - "nemotron" in _lowered - and (_lowered.startswith("unsloth/") or _lowered.startswith("nvidia/")) - and not config.get("trust_remote_code", False) - ): + if "nemotron" in _lowered and not config.get("trust_remote_code", False): config["trust_remote_code"] = True logger.info( "Auto-enabled trust_remote_code for Nemotron model: %s", diff --git a/studio/backend/tests/test_transformers_version.py b/studio/backend/tests/test_transformers_version.py index 609a154a9a..c031c2fea3 100644 --- a/studio/backend/tests/test_transformers_version.py +++ b/studio/backend/tests/test_transformers_version.py @@ -32,9 +32,8 @@ _resolve_base_model, _check_tokenizer_config_needs_v5, _check_config_needs_550, - _get_config_json, _tokenizer_class_cache, - _config_json_cache, + _config_needs_550_cache, needs_transformers_5, get_transformers_tier, ) @@ -203,7 +202,7 @@ class TestCheckConfigNeeds550: """Tests for _check_config_needs_550() local config.json checks.""" def setup_method(self): - _config_json_cache.clear() + _config_needs_550_cache.clear() def test_gemma4_architecture(self, tmp_path: Path): """config.json with Gemma4ForConditionalGeneration should return True.""" @@ -243,8 +242,8 @@ def test_result_is_cached(self, tmp_path: Path): key = str(tmp_path) _check_config_needs_550(key) - assert key in _config_json_cache - assert _config_json_cache[key] is not None + assert key in _config_needs_550_cache + assert _config_needs_550_cache[key] is True def test_local_file_skips_network(self, tmp_path: Path): """When local config.json exists, no network request should be made.""" @@ -266,7 +265,7 @@ class TestGetTransformersTier: def setup_method(self): _tokenizer_class_cache.clear() - _config_json_cache.clear() + _config_needs_550_cache.clear() def test_gemma4_substring_returns_550(self): assert get_transformers_tier("google/gemma-4-E2B-it") == "550" @@ -318,20 +317,6 @@ def test_550_checked_before_530(self): # This shouldn't happen in practice, but verifies priority assert get_transformers_tier("gemma-4-model") == "550" - def test_config_json_model_type_530(self, tmp_path: Path): - """Local checkpoint with qwen3_moe model_type → 530.""" - cfg = {"model_type": "qwen3_moe", "architectures": ["Qwen3MoeForCausalLM"]} - (tmp_path / "config.json").write_text(json.dumps(cfg)) - - assert get_transformers_tier(str(tmp_path)) == "530" - - def test_config_json_model_type_glm4_moe(self, tmp_path: Path): - """Local checkpoint with glm4_moe model_type → 530.""" - cfg = {"model_type": "glm4_moe", "architectures": ["Glm4MoeForCausalLM"]} - (tmp_path / "config.json").write_text(json.dumps(cfg)) - - assert get_transformers_tier(str(tmp_path)) == "530" - def test_needs_transformers_5_compat(self): """needs_transformers_5 should return True for both 530 and 550 models.""" assert needs_transformers_5("google/gemma-4-E2B-it") is True diff --git a/studio/backend/utils/transformers_version.py b/studio/backend/utils/transformers_version.py index 308f416940..7ba20a366b 100644 --- a/studio/backend/utils/transformers_version.py +++ b/studio/backend/utils/transformers_version.py @@ -60,25 +60,14 @@ "gemma4", # Gemma-4 alternate naming ) -# Architecture classes that require transformers 5.5.0. +# Architecture classes / model_type values that require transformers 5.5.0. +# Checked via config.json (local or HuggingFace). _TRANSFORMERS_550_ARCHITECTURES: set[str] = { "Gemma4ForConditionalGeneration", } - -# model_type values (from config.json) → tier mapping. _TRANSFORMERS_550_MODEL_TYPES: set[str] = { "gemma4", } -_TRANSFORMERS_530_MODEL_TYPES: set[str] = { - "qwen3_moe", - "qwen3_5_moe", - "qwen3_vl_moe", - "qwen3_next", - "deepseek_v3_moe", - "glm4_moe", - "glm4_moe_lite", - "ministral", -} # Tokenizer classes that only exist in transformers>=5.x _TRANSFORMERS_5_TOKENIZER_CLASSES: set[str] = { @@ -88,8 +77,8 @@ # Cache for dynamic tokenizer_config.json lookups to avoid repeated fetches _tokenizer_class_cache: dict[str, bool] = {} -# Cache for config.json lookups (returns the parsed dict or None) -_config_json_cache: dict[str, dict | None] = {} +# Cache for dynamic config.json lookups (architecture/model_type checks) +_config_needs_550_cache: dict[str, bool] = {} # Versions TRANSFORMERS_550_VERSION = "5.5.0" @@ -230,28 +219,43 @@ def _check_tokenizer_config_needs_v5(model_name: str) -> bool: return False -_SENTINEL = object() # distinguishes "not cached" from "cached as None" - - -def _get_config_json(model_name: str) -> dict | None: - """Read and cache ``config.json`` for *model_name*. +def _check_config_needs_550(model_name: str) -> bool: + """Check ``config.json`` for architectures or model_type that require + transformers 5.5.0 (e.g. Gemma 4). - Checks local path first, then fetches from HuggingFace. - Returns the parsed dict, or ``None`` on any error (fail-open). - The result is cached in ``_config_json_cache``. + Checks locally first, then falls back to fetching from HuggingFace. + Results are cached in ``_config_needs_550_cache``. + Returns False on any error (fail-open to lower tier). """ - cached = _config_json_cache.get(model_name, _SENTINEL) - if cached is not _SENTINEL: - return cached + if model_name in _config_needs_550_cache: + return _config_needs_550_cache[model_name] + + def _check_cfg(cfg: dict) -> bool: + archs = cfg.get("architectures", []) + if any(a in _TRANSFORMERS_550_ARCHITECTURES for a in archs): + return True + if cfg.get("model_type") in _TRANSFORMERS_550_MODEL_TYPES: + return True + return False # --- Check local config.json first ------------------------------------ - local_cfg = Path(model_name) / "config.json" + local_path = Path(model_name) + local_cfg = local_path / "config.json" if local_cfg.is_file(): try: with open(local_cfg) as f: cfg = json.load(f) - _config_json_cache[model_name] = cfg - return cfg + result = _check_cfg(cfg) + if result: + logger.info( + "Local config.json check: %s needs transformers 5.5.0 " + "(architectures=%s, model_type=%s)", + model_name, + cfg.get("architectures", []), + cfg.get("model_type"), + ) + _config_needs_550_cache[model_name] = result + return result except Exception as exc: logger.debug("Could not read %s: %s", local_cfg, exc) @@ -263,26 +267,21 @@ def _get_config_json(model_name: str) -> dict | None: req = urllib.request.Request(url, headers = {"User-Agent": "unsloth-studio"}) with urllib.request.urlopen(req, timeout = 10) as resp: cfg = json.loads(resp.read().decode()) - _config_json_cache[model_name] = cfg - return cfg + result = _check_cfg(cfg) + if result: + logger.info( + "Dynamic config.json check: %s needs transformers 5.5.0 " + "(architectures=%s, model_type=%s)", + model_name, + cfg.get("architectures", []), + cfg.get("model_type"), + ) + _config_needs_550_cache[model_name] = result + return result except Exception as exc: - logger.debug( - "Could not fetch config.json for '%s': %s", model_name, exc - ) - _config_json_cache[model_name] = None - return None - - -def _check_config_needs_550(model_name: str) -> bool: - """Check ``config.json`` for architectures or model_type that require - transformers 5.5.0. Uses the shared ``_get_config_json`` cache.""" - cfg = _get_config_json(model_name) - if cfg is None: + logger.debug("Could not fetch config.json for '%s': %s", model_name, exc) + _config_needs_550_cache[model_name] = False return False - archs = cfg.get("architectures", []) - if any(a in _TRANSFORMERS_550_ARCHITECTURES for a in archs): - return True - return cfg.get("model_type") in _TRANSFORMERS_550_MODEL_TYPES def get_transformers_tier(model_name: str) -> str: @@ -292,35 +291,19 @@ def get_transformers_tier(model_name: str) -> str: ``"530"`` for models needing transformers 5.3.0 (e.g. Ministral-3, Qwen3 MoE), or ``"default"`` for everything else (4.57.x). - Fast path: substring checks (no I/O) for both tiers run first. - Slow path: single config.json fetch (cached) checks model_type for - both tiers, then tokenizer_config.json as final fallback. + The 5.5.0 check runs first, then 5.3.0. """ lowered = model_name.lower() - # --- Fast substring checks (no I/O) ----------------------------------- + # --- Check 5.5.0 first ------------------------------------------------ if any(sub in lowered for sub in TRANSFORMERS_550_MODEL_SUBSTRINGS): return "550" + if _check_config_needs_550(model_name): + return "550" + + # --- Check 5.3.0 ------------------------------------------------------ if any(sub in lowered for sub in TRANSFORMERS_5_MODEL_SUBSTRINGS): return "530" - - # --- config.json model_type / architecture check (single fetch) ------- - cfg = _get_config_json(model_name) - if cfg is not None: - model_type = cfg.get("model_type", "") - archs = cfg.get("architectures", []) - - # Check 5.5.0 first - if model_type in _TRANSFORMERS_550_MODEL_TYPES: - return "550" - if any(a in _TRANSFORMERS_550_ARCHITECTURES for a in archs): - return "550" - - # Check 5.3.0 - if model_type in _TRANSFORMERS_530_MODEL_TYPES: - return "530" - - # --- Final fallback: tokenizer_config.json for 5.3.0 ------------------ if _check_tokenizer_config_needs_v5(model_name): return "530" From 96dd82fdf5c8cf1cdbbb929f3815aee42906bb54 Mon Sep 17 00:00:00 2001 From: Roland Tannous Date: Mon, 6 Apr 2026 20:27:15 +0000 Subject: [PATCH 10/17] add unsloth/nvidia namespace guard to Nemotron trust_remote_code auto-enable --- studio/backend/core/inference/worker.py | 6 +++++- studio/backend/core/training/worker.py | 6 +++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/studio/backend/core/inference/worker.py b/studio/backend/core/inference/worker.py index 1010e56dac..506e631c51 100644 --- a/studio/backend/core/inference/worker.py +++ b/studio/backend/core/inference/worker.py @@ -328,7 +328,11 @@ def _handle_load(backend, config: dict, resp_queue: Any) -> None: trust_remote_code = config.get("trust_remote_code", False) if not trust_remote_code: model_name = config["model_name"] - if "nemotron" in model_name.lower(): + _mn_lower = model_name.lower() + if ( + "nemotron" in _mn_lower + and (_mn_lower.startswith("unsloth/") or _mn_lower.startswith("nvidia/")) + ): trust_remote_code = True logger.info( "Auto-enabled trust_remote_code for Nemotron model: %s", diff --git a/studio/backend/core/training/worker.py b/studio/backend/core/training/worker.py index 54667dd7af..fa05817697 100644 --- a/studio/backend/core/training/worker.py +++ b/studio/backend/core/training/worker.py @@ -405,7 +405,11 @@ def run_training_process( # (Qwen3.5, Gemma 4, etc.) are native and do NOT need it — enabling it # bypasses the compiler (disabling fused CE). _lowered = model_name.lower() - if "nemotron" in _lowered and not config.get("trust_remote_code", False): + if ( + "nemotron" in _lowered + and (_lowered.startswith("unsloth/") or _lowered.startswith("nvidia/")) + and not config.get("trust_remote_code", False) + ): config["trust_remote_code"] = True logger.info( "Auto-enabled trust_remote_code for Nemotron model: %s", From 35226e67f90825f443a6e4030236a6308c66ac9f Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 6 Apr 2026 20:27:25 +0000 Subject: [PATCH 11/17] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- studio/backend/core/inference/worker.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/studio/backend/core/inference/worker.py b/studio/backend/core/inference/worker.py index 506e631c51..db504bac3a 100644 --- a/studio/backend/core/inference/worker.py +++ b/studio/backend/core/inference/worker.py @@ -329,9 +329,8 @@ def _handle_load(backend, config: dict, resp_queue: Any) -> None: if not trust_remote_code: model_name = config["model_name"] _mn_lower = model_name.lower() - if ( - "nemotron" in _mn_lower - and (_mn_lower.startswith("unsloth/") or _mn_lower.startswith("nvidia/")) + if "nemotron" in _mn_lower and ( + _mn_lower.startswith("unsloth/") or _mn_lower.startswith("nvidia/") ): trust_remote_code = True logger.info( From 82dbf70910c37593e11d49c9c62dc2447412ca3c Mon Sep 17 00:00:00 2001 From: Roland Tannous Date: Mon, 6 Apr 2026 20:31:06 +0000 Subject: [PATCH 12/17] reorder tier checks: all substring matches before config.json fetches --- studio/backend/utils/transformers_version.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/studio/backend/utils/transformers_version.py b/studio/backend/utils/transformers_version.py index 7ba20a366b..8403d9cc2f 100644 --- a/studio/backend/utils/transformers_version.py +++ b/studio/backend/utils/transformers_version.py @@ -295,15 +295,15 @@ def get_transformers_tier(model_name: str) -> str: """ lowered = model_name.lower() - # --- Check 5.5.0 first ------------------------------------------------ + # --- Fast substring checks (no I/O) ------------------------------------ if any(sub in lowered for sub in TRANSFORMERS_550_MODEL_SUBSTRINGS): return "550" - if _check_config_needs_550(model_name): - return "550" - - # --- Check 5.3.0 ------------------------------------------------------ if any(sub in lowered for sub in TRANSFORMERS_5_MODEL_SUBSTRINGS): return "530" + + # --- Slow config fallbacks (local file first, then network) ----------- + if _check_config_needs_550(model_name): + return "550" if _check_tokenizer_config_needs_v5(model_name): return "530" From 30bc604cadcd830dcd89d3ebac620a2600199c0e Mon Sep 17 00:00:00 2001 From: Roland Tannous Date: Mon, 6 Apr 2026 22:35:50 +0000 Subject: [PATCH 13/17] extract shared activate_transformers_for_subprocess into transformers_version.py --- studio/backend/core/export/worker.py | 41 ++----------------- studio/backend/core/inference/worker.py | 41 ++----------------- studio/backend/core/training/worker.py | 41 ++----------------- studio/backend/utils/transformers_version.py | 43 ++++++++++++++++++++ 4 files changed, 52 insertions(+), 114 deletions(-) diff --git a/studio/backend/core/export/worker.py b/studio/backend/core/export/worker.py index 4bdfd13b78..c38c925476 100644 --- a/studio/backend/core/export/worker.py +++ b/studio/backend/core/export/worker.py @@ -30,50 +30,15 @@ def _activate_transformers_version(model_name: str) -> None: - """Activate the correct transformers version BEFORE any ML imports. - - Uses get_transformers_tier() to decide between .venv_t5_550/ (5.5.0), - .venv_t5_530/ (5.3.0), or the default 4.57.x. - """ + """Activate the correct transformers version BEFORE any ML imports.""" # Ensure backend is on path for utils imports backend_path = str(Path(__file__).resolve().parent.parent.parent) if backend_path not in sys.path: sys.path.insert(0, backend_path) - from utils.transformers_version import ( - get_transformers_tier, - _resolve_base_model, - _ensure_venv_t5_530_exists, - _ensure_venv_t5_550_exists, - _VENV_T5_530_DIR, - _VENV_T5_550_DIR, - ) - - resolved = _resolve_base_model(model_name) - tier = get_transformers_tier(resolved) + from utils.transformers_version import activate_transformers_for_subprocess - if tier == "550": - if not _ensure_venv_t5_550_exists(): - raise RuntimeError( - f"Cannot activate transformers 5.5.0: .venv_t5_550 missing at {_VENV_T5_550_DIR}" - ) - if _VENV_T5_550_DIR not in sys.path: - sys.path.insert(0, _VENV_T5_550_DIR) - logger.info("Activated transformers 5.5.0 from %s", _VENV_T5_550_DIR) - _pp = os.environ.get("PYTHONPATH", "") - os.environ["PYTHONPATH"] = _VENV_T5_550_DIR + (os.pathsep + _pp if _pp else "") - elif tier == "530": - if not _ensure_venv_t5_530_exists(): - raise RuntimeError( - f"Cannot activate transformers 5.3.0: .venv_t5_530 missing at {_VENV_T5_530_DIR}" - ) - if _VENV_T5_530_DIR not in sys.path: - sys.path.insert(0, _VENV_T5_530_DIR) - logger.info("Activated transformers 5.3.0 from %s", _VENV_T5_530_DIR) - _pp = os.environ.get("PYTHONPATH", "") - os.environ["PYTHONPATH"] = _VENV_T5_530_DIR + (os.pathsep + _pp if _pp else "") - else: - logger.info("Using default transformers (4.57.x) for %s", model_name) + activate_transformers_for_subprocess(model_name) def _send_response(resp_queue: Any, response: dict) -> None: diff --git a/studio/backend/core/inference/worker.py b/studio/backend/core/inference/worker.py index db504bac3a..85293162f5 100644 --- a/studio/backend/core/inference/worker.py +++ b/studio/backend/core/inference/worker.py @@ -34,50 +34,15 @@ def _activate_transformers_version(model_name: str) -> None: - """Activate the correct transformers version BEFORE any ML imports. - - Uses get_transformers_tier() to decide between .venv_t5_550/ (5.5.0), - .venv_t5_530/ (5.3.0), or the default 4.57.x. - """ + """Activate the correct transformers version BEFORE any ML imports.""" # Ensure backend is on path for utils imports backend_path = str(Path(__file__).resolve().parent.parent.parent) if backend_path not in sys.path: sys.path.insert(0, backend_path) - from utils.transformers_version import ( - get_transformers_tier, - _resolve_base_model, - _ensure_venv_t5_530_exists, - _ensure_venv_t5_550_exists, - _VENV_T5_530_DIR, - _VENV_T5_550_DIR, - ) - - resolved = _resolve_base_model(model_name) - tier = get_transformers_tier(resolved) + from utils.transformers_version import activate_transformers_for_subprocess - if tier == "550": - if not _ensure_venv_t5_550_exists(): - raise RuntimeError( - f"Cannot activate transformers 5.5.0: .venv_t5_550 missing at {_VENV_T5_550_DIR}" - ) - if _VENV_T5_550_DIR not in sys.path: - sys.path.insert(0, _VENV_T5_550_DIR) - logger.info("Activated transformers 5.5.0 from %s", _VENV_T5_550_DIR) - _pp = os.environ.get("PYTHONPATH", "") - os.environ["PYTHONPATH"] = _VENV_T5_550_DIR + (os.pathsep + _pp if _pp else "") - elif tier == "530": - if not _ensure_venv_t5_530_exists(): - raise RuntimeError( - f"Cannot activate transformers 5.3.0: .venv_t5_530 missing at {_VENV_T5_530_DIR}" - ) - if _VENV_T5_530_DIR not in sys.path: - sys.path.insert(0, _VENV_T5_530_DIR) - logger.info("Activated transformers 5.3.0 from %s", _VENV_T5_530_DIR) - _pp = os.environ.get("PYTHONPATH", "") - os.environ["PYTHONPATH"] = _VENV_T5_530_DIR + (os.pathsep + _pp if _pp else "") - else: - logger.info("Using default transformers (4.57.x) for %s", model_name) + activate_transformers_for_subprocess(model_name) def _decode_image(image_base64: str): diff --git a/studio/backend/core/training/worker.py b/studio/backend/core/training/worker.py index fa05817697..bfff599001 100644 --- a/studio/backend/core/training/worker.py +++ b/studio/backend/core/training/worker.py @@ -306,50 +306,15 @@ def _ensure_mamba_ssm(event_queue: Any, model_name: str) -> None: def _activate_transformers_version(model_name: str) -> None: - """Activate the correct transformers version BEFORE any ML imports. - - Uses get_transformers_tier() to decide between .venv_t5_550/ (5.5.0), - .venv_t5_530/ (5.3.0), or the default 4.57.x. - """ + """Activate the correct transformers version BEFORE any ML imports.""" # Ensure backend is on path for utils imports backend_path = str(Path(__file__).resolve().parent.parent.parent) if backend_path not in sys.path: sys.path.insert(0, backend_path) - from utils.transformers_version import ( - get_transformers_tier, - _resolve_base_model, - _ensure_venv_t5_530_exists, - _ensure_venv_t5_550_exists, - _VENV_T5_530_DIR, - _VENV_T5_550_DIR, - ) - - resolved = _resolve_base_model(model_name) - tier = get_transformers_tier(resolved) + from utils.transformers_version import activate_transformers_for_subprocess - if tier == "550": - if not _ensure_venv_t5_550_exists(): - raise RuntimeError( - f"Cannot activate transformers 5.5.0: .venv_t5_550 missing at {_VENV_T5_550_DIR}" - ) - if _VENV_T5_550_DIR not in sys.path: - sys.path.insert(0, _VENV_T5_550_DIR) - logger.info("Activated transformers 5.5.0 from %s", _VENV_T5_550_DIR) - _pp = os.environ.get("PYTHONPATH", "") - os.environ["PYTHONPATH"] = _VENV_T5_550_DIR + (os.pathsep + _pp if _pp else "") - elif tier == "530": - if not _ensure_venv_t5_530_exists(): - raise RuntimeError( - f"Cannot activate transformers 5.3.0: .venv_t5_530 missing at {_VENV_T5_530_DIR}" - ) - if _VENV_T5_530_DIR not in sys.path: - sys.path.insert(0, _VENV_T5_530_DIR) - logger.info("Activated transformers 5.3.0 from %s", _VENV_T5_530_DIR) - _pp = os.environ.get("PYTHONPATH", "") - os.environ["PYTHONPATH"] = _VENV_T5_530_DIR + (os.pathsep + _pp if _pp else "") - else: - logger.info("Using default transformers (4.57.x) for %s", model_name) + activate_transformers_for_subprocess(model_name) def run_training_process( diff --git a/studio/backend/utils/transformers_version.py b/studio/backend/utils/transformers_version.py index 8403d9cc2f..0517a5f378 100644 --- a/studio/backend/utils/transformers_version.py +++ b/studio/backend/utils/transformers_version.py @@ -94,6 +94,49 @@ _VENV_T5_DIR = _VENV_T5_550_DIR +def activate_transformers_for_subprocess(model_name: str) -> None: + """Activate the correct transformers version in a subprocess worker. + + Call this BEFORE any ML imports. Resolves LoRA adapters to their base + model, determines the required tier, and prepends the appropriate + ``.venv_t5_*`` directory to ``sys.path``. Also propagates the path + via ``PYTHONPATH`` for child processes (e.g. GGUF converter). + + Used by training, inference, and export workers. + """ + resolved = _resolve_base_model(model_name) + tier = get_transformers_tier(resolved) + + if tier == "550": + if not _ensure_venv_t5_550_exists(): + raise RuntimeError( + f"Cannot activate transformers 5.5.0: " + f".venv_t5_550 missing at {_VENV_T5_550_DIR}" + ) + if _VENV_T5_550_DIR not in sys.path: + sys.path.insert(0, _VENV_T5_550_DIR) + logger.info("Activated transformers 5.5.0 from %s", _VENV_T5_550_DIR) + _pp = os.environ.get("PYTHONPATH", "") + os.environ["PYTHONPATH"] = ( + _VENV_T5_550_DIR + (os.pathsep + _pp if _pp else "") + ) + elif tier == "530": + if not _ensure_venv_t5_530_exists(): + raise RuntimeError( + f"Cannot activate transformers 5.3.0: " + f".venv_t5_530 missing at {_VENV_T5_530_DIR}" + ) + if _VENV_T5_530_DIR not in sys.path: + sys.path.insert(0, _VENV_T5_530_DIR) + logger.info("Activated transformers 5.3.0 from %s", _VENV_T5_530_DIR) + _pp = os.environ.get("PYTHONPATH", "") + os.environ["PYTHONPATH"] = ( + _VENV_T5_530_DIR + (os.pathsep + _pp if _pp else "") + ) + else: + logger.info("Using default transformers (4.57.x) for %s", model_name) + + def _resolve_base_model(model_name: str) -> str: """If *model_name* points to a LoRA adapter, return its base model. From 18db38b0a275ee018b8cd05a074f1de41a53efa1 Mon Sep 17 00:00:00 2001 From: Roland Tannous Date: Mon, 6 Apr 2026 22:45:22 +0000 Subject: [PATCH 14/17] narrow Nemotron trust_remote_code to nemotron_h/nemotron-3-nano, add to export worker --- studio/backend/core/export/worker.py | 13 +++++++++++++ studio/backend/core/inference/worker.py | 6 ++++-- studio/backend/core/training/worker.py | 6 ++++-- 3 files changed, 21 insertions(+), 4 deletions(-) diff --git a/studio/backend/core/export/worker.py b/studio/backend/core/export/worker.py index c38c925476..3f3dc955fa 100644 --- a/studio/backend/core/export/worker.py +++ b/studio/backend/core/export/worker.py @@ -56,6 +56,19 @@ def _handle_load(backend, cmd: dict, resp_queue: Any) -> None: load_in_4bit = cmd.get("load_in_4bit", True) trust_remote_code = cmd.get("trust_remote_code", False) + # Auto-enable trust_remote_code for NemotronH/Nano models. + if not trust_remote_code: + _NEMOTRON_TRUST_SUBSTRINGS = ("nemotron_h", "nemotron-h", "nemotron-3-nano") + _cp_lower = checkpoint_path.lower() + if any(sub in _cp_lower for sub in _NEMOTRON_TRUST_SUBSTRINGS) and ( + _cp_lower.startswith("unsloth/") or _cp_lower.startswith("nvidia/") + ): + trust_remote_code = True + logger.info( + "Auto-enabled trust_remote_code for Nemotron model: %s", + checkpoint_path, + ) + try: _send_response( resp_queue, diff --git a/studio/backend/core/inference/worker.py b/studio/backend/core/inference/worker.py index 85293162f5..fbcce276ba 100644 --- a/studio/backend/core/inference/worker.py +++ b/studio/backend/core/inference/worker.py @@ -287,14 +287,16 @@ def _handle_load(backend, config: dict, resp_queue: Any) -> None: except Exception as e: logger.warning("Could not read adapter_config.json: %s", e) - # Auto-enable trust_remote_code for Nemotron models only. + # Auto-enable trust_remote_code for NemotronH/Nano models only. # NemotronH has config parsing bugs requiring trust_remote_code=True. # Other transformers 5.x models are native and do NOT need it. + # NOTE: Must NOT match Llama-Nemotron (standard Llama architecture). + _NEMOTRON_TRUST_SUBSTRINGS = ("nemotron_h", "nemotron-h", "nemotron-3-nano") trust_remote_code = config.get("trust_remote_code", False) if not trust_remote_code: model_name = config["model_name"] _mn_lower = model_name.lower() - if "nemotron" in _mn_lower and ( + if any(sub in _mn_lower for sub in _NEMOTRON_TRUST_SUBSTRINGS) and ( _mn_lower.startswith("unsloth/") or _mn_lower.startswith("nvidia/") ): trust_remote_code = True diff --git a/studio/backend/core/training/worker.py b/studio/backend/core/training/worker.py index bfff599001..a461972eca 100644 --- a/studio/backend/core/training/worker.py +++ b/studio/backend/core/training/worker.py @@ -364,14 +364,16 @@ def run_training_process( ) return - # ── 1a. Auto-enable trust_remote_code for Nemotron models ── + # ── 1a. Auto-enable trust_remote_code for NemotronH/Nano models ── # NemotronH has config parsing bugs in transformers that require # trust_remote_code=True as a workaround. Other transformers 5.x models # (Qwen3.5, Gemma 4, etc.) are native and do NOT need it — enabling it # bypasses the compiler (disabling fused CE). + # NOTE: Must NOT match Llama-Nemotron (standard Llama architecture). + _NEMOTRON_TRUST_SUBSTRINGS = ("nemotron_h", "nemotron-h", "nemotron-3-nano") _lowered = model_name.lower() if ( - "nemotron" in _lowered + any(sub in _lowered for sub in _NEMOTRON_TRUST_SUBSTRINGS) and (_lowered.startswith("unsloth/") or _lowered.startswith("nvidia/")) and not config.get("trust_remote_code", False) ): From 1cc14f41d1a6e989dbdb74665a93a496c81662c6 Mon Sep 17 00:00:00 2001 From: Roland Tannous Date: Mon, 6 Apr 2026 22:46:14 +0000 Subject: [PATCH 15/17] clean venv_t5 dirs before re-install in setup.sh, clarify version alias comment --- studio/backend/utils/transformers_version.py | 3 ++- studio/setup.sh | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/studio/backend/utils/transformers_version.py b/studio/backend/utils/transformers_version.py index 0517a5f378..62176dc9b4 100644 --- a/studio/backend/utils/transformers_version.py +++ b/studio/backend/utils/transformers_version.py @@ -84,7 +84,8 @@ TRANSFORMERS_550_VERSION = "5.5.0" TRANSFORMERS_530_VERSION = "5.3.0" TRANSFORMERS_DEFAULT_VERSION = "4.57.6" -# Backwards-compat alias used by other modules +# Backwards-compat alias — points to 5.5.0 (the highest 5.x tier). +# Consumers should prefer TRANSFORMERS_530_VERSION / TRANSFORMERS_550_VERSION. TRANSFORMERS_5_VERSION = TRANSFORMERS_550_VERSION # Pre-installed directories — created by setup.sh / setup.ps1 diff --git a/studio/setup.sh b/studio/setup.sh index 72df82f601..3040569f54 100755 --- a/studio/setup.sh +++ b/studio/setup.sh @@ -514,6 +514,7 @@ if [ "$_SKIP_PYTHON_DEPS" = false ]; then # Clean up legacy single .venv_t5 directory [ -d "$STUDIO_HOME/.venv_t5" ] && rm -rf "$STUDIO_HOME/.venv_t5" + [ -d "$VENV_T5_530_DIR" ] && rm -rf "$VENV_T5_530_DIR" mkdir -p "$VENV_T5_530_DIR" run_quiet "install transformers 5.3.0" fast_install --target "$VENV_T5_530_DIR" --no-deps "transformers==5.3.0" run_quiet "install huggingface_hub for t5_530" fast_install --target "$VENV_T5_530_DIR" --no-deps "huggingface_hub==1.8.0" @@ -521,6 +522,7 @@ if [ "$_SKIP_PYTHON_DEPS" = false ]; then run_quiet "install tiktoken for t5_530" fast_install --target "$VENV_T5_530_DIR" "tiktoken" step "transformers" "5.3.0 pre-installed" + [ -d "$VENV_T5_550_DIR" ] && rm -rf "$VENV_T5_550_DIR" mkdir -p "$VENV_T5_550_DIR" run_quiet "install transformers 5.5.0" fast_install --target "$VENV_T5_550_DIR" --no-deps "transformers==5.5.0" run_quiet "install huggingface_hub for t5_550" fast_install --target "$VENV_T5_550_DIR" --no-deps "huggingface_hub==1.8.0" From 5445fa6e16e9233179480bf19c5fd7fdc3459938 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 6 Apr 2026 22:46:58 +0000 Subject: [PATCH 16/17] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- studio/backend/utils/transformers_version.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/studio/backend/utils/transformers_version.py b/studio/backend/utils/transformers_version.py index 62176dc9b4..0c13b5455b 100644 --- a/studio/backend/utils/transformers_version.py +++ b/studio/backend/utils/transformers_version.py @@ -118,9 +118,7 @@ def activate_transformers_for_subprocess(model_name: str) -> None: sys.path.insert(0, _VENV_T5_550_DIR) logger.info("Activated transformers 5.5.0 from %s", _VENV_T5_550_DIR) _pp = os.environ.get("PYTHONPATH", "") - os.environ["PYTHONPATH"] = ( - _VENV_T5_550_DIR + (os.pathsep + _pp if _pp else "") - ) + os.environ["PYTHONPATH"] = _VENV_T5_550_DIR + (os.pathsep + _pp if _pp else "") elif tier == "530": if not _ensure_venv_t5_530_exists(): raise RuntimeError( @@ -131,9 +129,7 @@ def activate_transformers_for_subprocess(model_name: str) -> None: sys.path.insert(0, _VENV_T5_530_DIR) logger.info("Activated transformers 5.3.0 from %s", _VENV_T5_530_DIR) _pp = os.environ.get("PYTHONPATH", "") - os.environ["PYTHONPATH"] = ( - _VENV_T5_530_DIR + (os.pathsep + _pp if _pp else "") - ) + os.environ["PYTHONPATH"] = _VENV_T5_530_DIR + (os.pathsep + _pp if _pp else "") else: logger.info("Using default transformers (4.57.x) for %s", model_name) From 45e3a02cc5cbaa7871a5588a84a27191a0ff2eb0 Mon Sep 17 00:00:00 2001 From: Roland Tannous Date: Tue, 7 Apr 2026 07:16:26 +0000 Subject: [PATCH 17/17] run venv_t5 migration outside deps fast-path gate in both setup scripts --- studio/setup.ps1 | 39 ++++++++++++++++++++++++--------------- studio/setup.sh | 34 +++++++++++++++++++++++----------- 2 files changed, 47 insertions(+), 26 deletions(-) diff --git a/studio/setup.ps1 b/studio/setup.ps1 index fa853cdadf..c3a8cd71ca 100644 --- a/studio/setup.ps1 +++ b/studio/setup.ps1 @@ -1579,23 +1579,37 @@ if ($stackExit -ne 0) { exit 1 } -# ── Pre-install transformers 5.x into .venv_t5_530/ and .venv_t5_550/ ── -# Models like GLM-4.7-Flash, Qwen3 MoE need transformers>=5.3.0. -# Gemma 4 models need transformers>=5.5.0. -# Pre-install into separate directories to avoid runtime pip overhead. -# The training subprocess prepends the appropriate dir to sys.path. -Write-Host "" +} else { + step "python" "dependencies up to date" + # Restore ErrorActionPreference (was lowered for pip/python section) + $ErrorActionPreference = $prevEAP +} -# Clean up legacy single .venv_t5 directory +# ── Pre-install transformers 5.x into .venv_t5_530/ and .venv_t5_550/ ── +# Runs outside the deps fast-path gate so that upgrades from the legacy +# single .venv_t5 are always migrated to the tiered layout. +$VenvT5_530Dir = Join-Path $env:USERPROFILE ".unsloth\studio\.venv_t5_530" +$VenvT5_550Dir = Join-Path $env:USERPROFILE ".unsloth\studio\.venv_t5_550" $VenvT5Legacy = Join-Path $env:USERPROFILE ".unsloth\studio\.venv_t5" -if (Test-Path $VenvT5Legacy) { Remove-Item -Recurse -Force $VenvT5Legacy } + +$_NeedT5Install = $false +if (Test-Path $VenvT5Legacy) { + Remove-Item -Recurse -Force $VenvT5Legacy + $_NeedT5Install = $true +} +if (-not (Test-Path $VenvT5_530Dir)) { $_NeedT5Install = $true } +if (-not (Test-Path $VenvT5_550Dir)) { $_NeedT5Install = $true } +# Also reinstall when python deps were updated +if (-not $SkipPythonDeps) { $_NeedT5Install = $true } + +if ($_NeedT5Install) { +Write-Host "" $prevEAP_t5 = $ErrorActionPreference $ErrorActionPreference = "Continue" # --- .venv_t5_530 (transformers 5.3.0) --- substep "pre-installing transformers 5.3.0 for newer model support..." -$VenvT5_530Dir = Join-Path $env:USERPROFILE ".unsloth\studio\.venv_t5_530" if (Test-Path $VenvT5_530Dir) { Remove-Item -Recurse -Force $VenvT5_530Dir } New-Item -ItemType Directory -Path $VenvT5_530Dir -Force | Out-Null foreach ($pkg in @("transformers==5.3.0", "huggingface_hub==1.8.0", "hf_xet==1.4.2")) { @@ -1629,7 +1643,6 @@ step "transformers" "5.3.0 pre-installed" # --- .venv_t5_550 (transformers 5.5.0) --- substep "pre-installing transformers 5.5.0 for Gemma 4 support..." -$VenvT5_550Dir = Join-Path $env:USERPROFILE ".unsloth\studio\.venv_t5_550" if (Test-Path $VenvT5_550Dir) { Remove-Item -Recurse -Force $VenvT5_550Dir } New-Item -ItemType Directory -Path $VenvT5_550Dir -Force | Out-Null foreach ($pkg in @("transformers==5.5.0", "huggingface_hub==1.8.0", "hf_xet==1.4.2")) { @@ -1662,11 +1675,7 @@ if ($tiktokenInstallExit -ne 0) { $ErrorActionPreference = $prevEAP_t5 step "transformers" "5.5.0 pre-installed" -} else { - step "python" "dependencies up to date" - # Restore ErrorActionPreference (was lowered for pip/python section) - $ErrorActionPreference = $prevEAP -} +} # end $_NeedT5Install # ========================================================================== # PHASE 3.4: Prefer prebuilt llama.cpp bundles before source build diff --git a/studio/setup.sh b/studio/setup.sh index 3040569f54..eb89e583ca 100755 --- a/studio/setup.sh +++ b/studio/setup.sh @@ -504,16 +504,31 @@ fi if [ "$_SKIP_PYTHON_DEPS" = false ]; then install_python_stack +else + step "python" "dependencies up to date" + verbose_substep "python deps check: installed=$_PKG_NAME@${INSTALLED_VER:-unknown} latest=${LATEST_VER:-unknown}" +fi - # ── 6b. Pre-install transformers 5.x into .venv_t5_530/ and .venv_t5_550/ ── - # Models like GLM-4.7-Flash, Qwen3 MoE need transformers>=5.3.0. - # Gemma 4 models need transformers>=5.5.0. - # Pre-install into separate directories to avoid runtime pip overhead. - # The training subprocess prepends the appropriate dir to sys.path. - - # Clean up legacy single .venv_t5 directory - [ -d "$STUDIO_HOME/.venv_t5" ] && rm -rf "$STUDIO_HOME/.venv_t5" +# ── 6b. Pre-install transformers 5.x into .venv_t5_530/ and .venv_t5_550/ ── +# Models like GLM-4.7-Flash, Qwen3 MoE need transformers>=5.3.0. +# Gemma 4 models need transformers>=5.5.0. +# Pre-install into separate directories to avoid runtime pip overhead. +# The training subprocess prepends the appropriate dir to sys.path. +# +# Runs outside the _SKIP_PYTHON_DEPS gate so that upgrades from legacy +# single .venv_t5 are always migrated to the tiered layout. +_NEED_T5_INSTALL=false +if [ -d "$STUDIO_HOME/.venv_t5" ]; then + # Legacy layout — migrate + rm -rf "$STUDIO_HOME/.venv_t5" + _NEED_T5_INSTALL=true +fi +[ ! -d "$VENV_T5_530_DIR" ] && _NEED_T5_INSTALL=true +[ ! -d "$VENV_T5_550_DIR" ] && _NEED_T5_INSTALL=true +# Also reinstall when python deps were updated (packages may need rebuild) +[ "$_SKIP_PYTHON_DEPS" = false ] && _NEED_T5_INSTALL=true +if [ "$_NEED_T5_INSTALL" = true ]; then [ -d "$VENV_T5_530_DIR" ] && rm -rf "$VENV_T5_530_DIR" mkdir -p "$VENV_T5_530_DIR" run_quiet "install transformers 5.3.0" fast_install --target "$VENV_T5_530_DIR" --no-deps "transformers==5.3.0" @@ -529,9 +544,6 @@ if [ "$_SKIP_PYTHON_DEPS" = false ]; then run_quiet "install hf_xet for t5_550" fast_install --target "$VENV_T5_550_DIR" --no-deps "hf_xet==1.4.2" run_quiet "install tiktoken for t5_550" fast_install --target "$VENV_T5_550_DIR" "tiktoken" step "transformers" "5.5.0 pre-installed" -else - step "python" "dependencies up to date" - verbose_substep "python deps check: installed=$_PKG_NAME@${INSTALLED_VER:-unknown} latest=${LATEST_VER:-unknown}" fi fi