diff --git a/studio/backend/core/training/training.py b/studio/backend/core/training/training.py
index 6dd42976c7..d69246f584 100644
--- a/studio/backend/core/training/training.py
+++ b/studio/backend/core/training/training.py
@@ -37,6 +37,42 @@
 logger = get_logger(__name__)
 
 
+def _coerce_seed(value, default = 3407) -> int:
+    """Normalize None / non-int to `default` (transformers.set_seed(None) raises)."""
+    if value is None:
+        return int(default)
+    try:
+        return int(value)
+    except (TypeError, ValueError):
+        return int(default)
+
+
+def _coerce_optional_bool(value, default: bool) -> bool:
+    """Treat explicit None as `default` instead of `bool(None) == False`."""
+    if value is None:
+        return bool(default)
+    if isinstance(value, str):
+        normalized = value.strip().lower()
+        if normalized in ("true", "1", "yes", "on"):
+            return True
+        if normalized in ("false", "0", "no", "off", ""):
+            return False
+    return bool(value)
+
+
+def _coerce_optional_nonneg_float(name: str, value):
+    """Reject negatives; HTTP `ge=0` doesn't cover raw `**kwargs` callers."""
+    if value is None:
+        return None
+    try:
+        coerced = float(value)
+    except (TypeError, ValueError):
+        raise ValueError(f"Unsloth: {name}={value!r} must be a non-negative float or None.")
+    if coerced < 0:
+        raise ValueError(f"Unsloth: {name}={coerced} must be >= 0 (use 0 or None to disable).")
+    return coerced
+
+
 _HF_TMP_CHECKPOINT_RE = re.compile(r"^tmp-checkpoint-\d+$")
 
 
@@ -239,7 +275,17 @@ def start_training(self, job_id: str, **kwargs) -> bool:
             "save_steps": kwargs.get("save_steps", 0),
             "weight_decay": kwargs.get("weight_decay", 0.001),
             "max_grad_norm": kwargs.get("max_grad_norm", 0.0),
-            "random_seed": kwargs.get("random_seed", 3407),
+            "max_grad_value": _coerce_optional_nonneg_float(
+                "max_grad_value", kwargs.get("max_grad_value")
+            ),
+            "max_grad_leaf_norm": _coerce_optional_nonneg_float(
+                "max_grad_leaf_norm", kwargs.get("max_grad_leaf_norm")
+            ),
+            "cast_norm_output_to_input_dtype": _coerce_optional_bool(
+                kwargs.get("cast_norm_output_to_input_dtype"), True
+            ),
+            # MLX/CUDA/embedding workers need an int (transformers.set_seed(None) raises).
+            "random_seed": _coerce_seed(kwargs.get("random_seed")),
             "packing": kwargs.get("packing", False),
             "optim": kwargs.get("optim", "adamw_8bit"),
             "lr_scheduler_type": kwargs.get("lr_scheduler_type", "linear"),
diff --git a/studio/backend/core/training/worker.py b/studio/backend/core/training/worker.py
index 1120744a2d..696e9a07ff 100644
--- a/studio/backend/core/training/worker.py
+++ b/studio/backend/core/training/worker.py
@@ -1424,6 +1424,14 @@ def _poll_stop():
     is_dataset_image = bool(config.get("is_dataset_image", False))
     training_type = config.get("training_type", "LoRA/QLoRA")
     use_lora = training_type == "LoRA/QLoRA"
+    # Normalize seed; explicit None must not reach the seed chain.
+    _raw_seed = config.get("random_seed", 3407)
+    random_seed = 3407 if _raw_seed is None else int(_raw_seed)
+    # `config.get(k, d)` only fills d when key is missing; handle explicit None too.
+    _model_seed = config.get("model_random_state")
+    model_random_state = random_seed if _model_seed is None else int(_model_seed)
+    _lora_seed = config.get("lora_random_state")
+    lora_random_state = random_seed if _lora_seed is None else int(_lora_seed)
     model, tokenizer = FastMLXModel.from_pretrained(
         model_name,
         load_in_4bit = config.get("load_in_4bit", True),
@@ -1431,7 +1439,7 @@ def _poll_stop():
         text_only = None if is_dataset_image else True,
         token = hf_token,
         trust_remote_code = bool(config.get("trust_remote_code", False)),
-        random_state = config.get("random_seed", 3407),
+        random_state = model_random_state,
     )
 
     is_vlm = bool(is_dataset_image and getattr(model, "_is_vlm_model", False))
@@ -1473,7 +1481,7 @@ def _poll_stop():
             lora_dropout = config.get("lora_dropout", 0.0),
             use_rslora = config.get("use_rslora", False),
             init_lora_weights = config.get("init_lora_weights", True),
-            random_state = config.get("random_seed", 3407),
+            random_state = lora_random_state,
             target_modules = config.get("target_modules")
             or [
                 "q_proj",
@@ -1704,40 +1712,76 @@ def _fmt_progress(status_message = "", **_kw):
     else:
         eval_steps_val = int(eval_steps_val)
 
-    # MLX: per-element clip to [-1, 1]; norm clip disabled (its global reduction
-    # breaks MLX's eager pipeline). 1.0 not 5.0: |g_i| > 5 rarely fires, so the
-    # historical 5.0 was effectively a no-op.
+    # Per-element clipping only; trainer owns the None default. Re-validate
+    # for direct worker callers (training.py normalizes the main path).
     max_grad_norm = 0.0
-    max_grad_value = 1.0  # TODO: expose MLX grad-clip in Studio UI for power users
+    max_grad_value = config.get("max_grad_value")
+    if max_grad_value is not None:
+        max_grad_value = float(max_grad_value)
+        if max_grad_value < 0:
+            raise ValueError(
+                f"Unsloth MLX: max_grad_value={max_grad_value} must be >= 0 "
+                "(0 or None disables elementwise clipping)."
+            )
+    max_grad_leaf_norm = config.get("max_grad_leaf_norm")
+    if max_grad_leaf_norm is not None:
+        max_grad_leaf_norm = float(max_grad_leaf_norm)
+        if max_grad_leaf_norm < 0:
+            raise ValueError(
+                f"Unsloth MLX: max_grad_leaf_norm={max_grad_leaf_norm} must be >= 0 "
+                "(0 or None disables proportional leaf-norm clipping)."
+            )
+    weight_decay = config.get("weight_decay", 0.001)
+    weight_decay = 0.001 if weight_decay is None else float(weight_decay)
+
+    mlx_config_kwargs = dict(
+        per_device_train_batch_size = batch_size,
+        gradient_accumulation_steps = grad_accum,
+        max_steps = max_steps,
+        learning_rate = lr_value,
+        warmup_steps = warmup_steps,
+        lr_scheduler_type = lr_scheduler_type,
+        optim = optim_name,
+        weight_decay = weight_decay,
+        max_grad_norm = max_grad_norm,
+        max_grad_value = max_grad_value,
+        logging_steps = 1,
+        max_seq_length = max_seq_length,
+        seed = random_seed,
+        use_cce = True,
+        compile = True,
+        gradient_checkpointing = use_grad_checkpoint,
+        streaming = is_vlm,
+        packing = bool(config.get("packing", False)),
+        output_dir = output_dir,
+        save_steps = int(config.get("save_steps", 0) or 0),
+        eval_steps = eval_steps_val,
+    )
+
+    # Feature-detect optional fields so this PR works without the paired zoo bump.
+    _supported_fields = getattr(MLXTrainingConfig, "__dataclass_fields__", {})
+    if "cast_norm_output_to_input_dtype" in _supported_fields:
+        # Explicit None falls back to True (default).
+        _raw_cast = config.get("cast_norm_output_to_input_dtype", True)
+        mlx_config_kwargs["cast_norm_output_to_input_dtype"] = (
+            True if _raw_cast is None else bool(_raw_cast)
+        )
+    if "dataset_order" in _supported_fields:
+        mlx_config_kwargs["dataset_order"] = "torch_randperm"
+    if "max_grad_leaf_norm" in _supported_fields:
+        mlx_config_kwargs["max_grad_leaf_norm"] = max_grad_leaf_norm
+    if "append_eos" in _supported_fields:
+        raw_text_mode = training_type == "Continued Pretraining" or format_type == "raw"
+        # Studio SFT formatting owns rendered examples; raw/CPT text still
+        # needs MLX to append EOS like the CUDA raw-text path.
+        mlx_config_kwargs["append_eos"] = bool(raw_text_mode)
 
     trainer = MLXTrainer(
         model = model,
         tokenizer = tokenizer,
         train_dataset = dataset,
         eval_dataset = eval_dataset,
-        args = MLXTrainingConfig(
-            per_device_train_batch_size = batch_size,
-            gradient_accumulation_steps = grad_accum,
-            max_steps = max_steps,
-            learning_rate = lr_value,
-            warmup_steps = warmup_steps,
-            lr_scheduler_type = lr_scheduler_type,
-            optim = optim_name,
-            weight_decay = float(config.get("weight_decay", 0.001) or 0.001),
-            max_grad_norm = max_grad_norm,
-            max_grad_value = max_grad_value,
-            logging_steps = 1,
-            max_seq_length = max_seq_length,
-            seed = config.get("random_seed", 3407),
-            use_cce = True,
-            compile = True,
-            gradient_checkpointing = use_grad_checkpoint,
-            streaming = is_vlm,
-            packing = bool(config.get("packing", False)),
-            output_dir = output_dir,
-            save_steps = int(config.get("save_steps", 0) or 0),
-            eval_steps = eval_steps_val,
-        ),
+        args = MLXTrainingConfig(**mlx_config_kwargs),
     )
     _trainer_ref[0] = trainer
     if _stop_requested[0]:
diff --git a/studio/backend/models/training.py b/studio/backend/models/training.py
index ca04591178..6cf8907b84 100644
--- a/studio/backend/models/training.py
+++ b/studio/backend/models/training.py
@@ -325,7 +325,37 @@ def _check_lora_dropout(cls, v: float) -> float:
         ge = 0,
         description = "Global gradient norm clipping threshold. Set 0 to disable.",
     )
-    random_seed: int = Field(42, description = "Random seed")
+    max_grad_value: Optional[float] = Field(
+        None,
+        ge = 0,
+        description = (
+            "MLX-only elementwise gradient value clipping threshold. "
+            "If unset, MLX uses its runtime default."
+        ),
+    )
+    max_grad_leaf_norm: Optional[float] = Field(
+        None,
+        ge = 0,
+        description = (
+            "MLX-only proportional per-parameter gradient norm cap. "
+            "Preserves each tensor's gradient direction without global norm "
+            "clipping's memory overhead."
+        ),
+    )
+    cast_norm_output_to_input_dtype: bool = Field(
+        True,
+        description = (
+            "MLX-only: keep norm parameters in fp32 but cast norm outputs "
+            "back to the incoming activation dtype."
+        ),
+    )
+    random_seed: int = Field(
+        3407,
+        description = (
+            "Random seed; matches the Studio backend / MLX worker default "
+            "and unsloth's historical recommended value."
+        ),
+    )
     packing: bool = Field(False, description = "Enable sequence packing")
     optim: str = Field("adamw_8bit", description = "Optimizer")
     lr_scheduler_type: str = Field("linear", description = "Learning rate scheduler type")
diff --git a/studio/backend/routes/training.py b/studio/backend/routes/training.py
index 281f03bcaf..09a3e06c91 100644
--- a/studio/backend/routes/training.py
+++ b/studio/backend/routes/training.py
@@ -215,6 +215,9 @@ async def start_training(
             "save_steps": request.save_steps,
             "weight_decay": request.weight_decay,
             "max_grad_norm": request.max_grad_norm,
+            "max_grad_value": request.max_grad_value,
+            "max_grad_leaf_norm": request.max_grad_leaf_norm,
+            "cast_norm_output_to_input_dtype": request.cast_norm_output_to_input_dtype,
             "random_seed": request.random_seed,
             "packing": request.packing,
             "optim": request.optim,
diff --git a/studio/backend/tests/test_mlx_training_worker_config.py b/studio/backend/tests/test_mlx_training_worker_config.py
index 4402031467..44ef9045b4 100644
--- a/studio/backend/tests/test_mlx_training_worker_config.py
+++ b/studio/backend/tests/test_mlx_training_worker_config.py
@@ -85,6 +85,13 @@ def test_mlx_studio_rejects_unknown_scheduler():
         _normalize_mlx_studio_scheduler("linear_typo")
 
 
+def test_mlx_studio_keeps_hf_style_tokenizer_dual_purpose():
+    source = (Path(__file__).resolve().parents[1] / "core" / "training" / "worker.py").read_text()
+
+    assert "tokenizer = tokenizer" in source
+    assert "processor = tokenizer if is_vlm else None" not in source
+
+
 def test_mlx_vlm_resize_uses_max_dimension_like_torch_trainer():
     assert _mlx_vlm_max_resized_size(1000, 500, 512) == (512, 256)
     assert _mlx_vlm_max_resized_size(500, 1000, 512) == (256, 512)
diff --git a/studio/backend/tests/test_training_raw_support.py b/studio/backend/tests/test_training_raw_support.py
index 2b1299de5c..fb3cffc91e 100644
--- a/studio/backend/tests/test_training_raw_support.py
+++ b/studio/backend/tests/test_training_raw_support.py
@@ -107,10 +107,191 @@ def start(self):
                 model_name = "unsloth/test",
                 training_type = "LoRA/QLoRA",
                 max_grad_norm = 0.7,
+                max_grad_value = 3.0,
+                max_grad_leaf_norm = 1.3,
             )
 
         config = mock_process.call_args.kwargs["kwargs"]["config"]
         self.assertEqual(config["max_grad_norm"], 0.7)
+        self.assertEqual(config["max_grad_value"], 3.0)
+        self.assertEqual(config["max_grad_leaf_norm"], 1.3)
+
+    def test_training_backend_forwards_random_seed_without_internal_mlx_seed_keys(self):
+        backend = TrainingBackend()
+
+        class DummyProcess:
+            pid = 12345
+
+            def start(self):
+                return None
+
+        class DummyThread:
+            def start(self):
+                return None
+
+        dummy_queue = object()
+
+        with (
+            patch(
+                "core.training.training.prepare_gpu_selection",
+                return_value = ([0], {"selection_mode": "auto"}),
+            ),
+            patch(
+                "core.training.training._CTX.Queue",
+                side_effect = [dummy_queue, dummy_queue],
+            ),
+            patch(
+                "core.training.training._CTX.Process", return_value = DummyProcess()
+            ) as mock_process,
+            patch(
+                "core.training.training.threading.Thread",
+                return_value = DummyThread(),
+            ),
+        ):
+            backend.start_training(
+                job_id = "test-seed",
+                model_name = "unsloth/test",
+                training_type = "LoRA/QLoRA",
+                random_seed = 1234,
+            )
+
+        config = mock_process.call_args.kwargs["kwargs"]["config"]
+        self.assertEqual(config["random_seed"], 1234)
+        self.assertNotIn("model_random_state", config)
+        self.assertNotIn("lora_random_state", config)
+
+    def test_route_forwards_all_grad_clipping_fields(self):
+        # The HTTP route builds the config dict by hand; a schema field that
+        # is not forwarded here is silently dropped for REST callers.
+        source = (_BACKEND_ROOT / "routes" / "training.py").read_text()
+        self.assertIn('"max_grad_norm": request.max_grad_norm', source)
+        self.assertIn('"max_grad_value": request.max_grad_value', source)
+        self.assertIn('"max_grad_leaf_norm": request.max_grad_leaf_norm', source)
+
+    def test_mlx_worker_falls_back_init_seeds_to_random_seed(self):
+        source = (_BACKEND_ROOT / "core" / "training" / "worker.py").read_text()
+
+        # random_seed itself is normalized first so explicit None coming
+        # from a raw / backend caller does not propagate through the chain.
+        self.assertIn('_raw_seed = config.get("random_seed", 3407)', source)
+        self.assertIn(
+            "random_seed = 3407 if _raw_seed is None else int(_raw_seed)",
+            source,
+        )
+        # Both absent and explicit None must fall back to random_seed.
+        # `dict.get(key, default)` only fills the default on absent keys,
+        # so an explicit `None` would otherwise reach FastMLXModel /
+        # get_peft_model and disable deterministic init.
+        self.assertIn('_model_seed = config.get("model_random_state")', source)
+        self.assertIn(
+            "model_random_state = random_seed if _model_seed is None else int(_model_seed)",
+            source,
+        )
+        self.assertIn('_lora_seed = config.get("lora_random_state")', source)
+        self.assertIn(
+            "lora_random_state = random_seed if _lora_seed is None else int(_lora_seed)",
+            source,
+        )
+        self.assertIn("random_state = model_random_state", source)
+        self.assertIn("random_state = lora_random_state", source)
+        # MLXTrainingConfig now receives the normalized seed directly.
+        self.assertIn("seed = random_seed,", source)
+
+    def test_mlx_worker_preserves_null_max_grad_value_for_trainer_default(self):
+        source = (_BACKEND_ROOT / "core" / "training" / "worker.py").read_text()
+
+        # None must survive to the MLX trainer so it picks its own runtime
+        # default, and any other value must coerce to float without
+        # rebinding None to 1.0 (which the legacy code did).
+        self.assertIn('max_grad_value = config.get("max_grad_value")', source)
+        self.assertIn("max_grad_value = float(max_grad_value)", source)
+        self.assertNotIn(
+            "max_grad_value = 1.0 if max_grad_value is None else float(max_grad_value)",
+            source,
+        )
+
+    def test_training_backend_normalizes_explicit_none_seed_and_dtypes(self):
+        # Raw / backend callers can pass `random_seed=None`,
+        # `cast_norm_output_to_input_dtype=None`, and MLX clip knobs
+        # as None (or omit them) and must NOT leak the
+        # `None` past `TrainingBackend.start_training`. Otherwise
+        # transformers.set_seed(None) raises, PEFT init becomes
+        # nondeterministic, and the MLX norm-output cast silently flips.
+        from core.training.training import (
+            _coerce_seed,
+            _coerce_optional_bool,
+            _coerce_optional_nonneg_float,
+        )
+
+        self.assertEqual(_coerce_seed(None), 3407)
+        self.assertEqual(_coerce_seed("123"), 123)
+        self.assertEqual(_coerce_seed("not-a-number"), 3407)
+
+        self.assertTrue(_coerce_optional_bool(None, True))
+        self.assertFalse(_coerce_optional_bool(None, False))
+        self.assertFalse(_coerce_optional_bool("false", True))
+        self.assertTrue(_coerce_optional_bool("true", False))
+
+        self.assertIsNone(_coerce_optional_nonneg_float("max_grad_value", None))
+        self.assertEqual(_coerce_optional_nonneg_float("max_grad_value", "2.5"), 2.5)
+        self.assertEqual(_coerce_optional_nonneg_float("max_grad_value", 0), 0.0)
+        with self.assertRaises(ValueError):
+            _coerce_optional_nonneg_float("max_grad_value", -1)
+        self.assertIsNone(_coerce_optional_nonneg_float("max_grad_leaf_norm", None))
+        self.assertEqual(
+            _coerce_optional_nonneg_float("max_grad_leaf_norm", "1.3"),
+            1.3,
+        )
+        with self.assertRaises(ValueError):
+            _coerce_optional_nonneg_float("max_grad_leaf_norm", -1)
+
+    def test_mlx_worker_feature_detects_optional_mlx_config_fields(self):
+        # `cast_norm_output_to_input_dtype`, `dataset_order`,
+        # `max_grad_leaf_norm`, and `append_eos` ship in the paired
+        # unsloth-zoo update. Until that floor is in place, the
+        # worker must gate them so releases that predate those fields can
+        # still construct MLXTrainingConfig without TypeError.
+        source = (_BACKEND_ROOT / "core" / "training" / "worker.py").read_text()
+
+        self.assertIn(
+            'getattr(MLXTrainingConfig, "__dataclass_fields__", {})',
+            source,
+        )
+        self.assertIn('if "cast_norm_output_to_input_dtype" in _supported_fields:', source)
+        self.assertIn('if "dataset_order" in _supported_fields:', source)
+        self.assertIn('if "max_grad_leaf_norm" in _supported_fields:', source)
+        self.assertIn(
+            'mlx_config_kwargs["max_grad_leaf_norm"] = max_grad_leaf_norm',
+            source,
+        )
+        self.assertIn('if "append_eos" in _supported_fields:', source)
+        self.assertIn('format_type == "raw"', source)
+        self.assertIn('mlx_config_kwargs["append_eos"] = bool(raw_text_mode)', source)
+        # The unconditional kwargs must NOT include any gated field.
+        # Use proper paren tracking; `source.find(")", ...)` would stop at
+        # the first close paren inside the dict body (e.g.
+        # `int(config.get("save_steps", 0) or 0)`) and miss any future
+        # unconditional addition of the gated fields later in the dict.
+        unconditional_block_start = source.find("mlx_config_kwargs = dict(")
+        self.assertNotEqual(unconditional_block_start, -1)
+        depth = 0
+        i = unconditional_block_start + len("mlx_config_kwargs = dict")
+        end = i
+        while i < len(source):
+            ch = source[i]
+            if ch == "(":
+                depth += 1
+            elif ch == ")":
+                depth -= 1
+                if depth == 0:
+                    end = i + 1
+                    break
+            i += 1
+        unconditional = source[unconditional_block_start:end]
+        self.assertNotIn("cast_norm_output_to_input_dtype", unconditional)
+        self.assertNotIn("dataset_order", unconditional)
+        self.assertNotIn("max_grad_leaf_norm", unconditional)
+        self.assertNotIn("append_eos", unconditional)
 
     def test_training_route_forwards_embedding_learning_rate(self):
         training_route = _load_route_module(
diff --git a/studio/frontend/src/features/training/api/mappers.ts b/studio/frontend/src/features/training/api/mappers.ts
index ead719f825..773c5581ae 100644
--- a/studio/frontend/src/features/training/api/mappers.ts
+++ b/studio/frontend/src/features/training/api/mappers.ts
@@ -111,6 +111,7 @@ export function buildTrainingStartPayload(
     eval_steps: config.evalSteps,
     weight_decay: config.weightDecay,
     max_grad_norm: 0.0,
+    max_grad_value: null,
     random_seed: config.randomSeed,
     packing: isEmbedding ? false : config.packing,
     optim: config.optimizerType,
diff --git a/studio/frontend/src/features/training/types/api.ts b/studio/frontend/src/features/training/types/api.ts
index 8490d5ee6f..b466276bc2 100644
--- a/studio/frontend/src/features/training/types/api.ts
+++ b/studio/frontend/src/features/training/types/api.ts
@@ -37,6 +37,7 @@ export interface TrainingStartRequest {
   eval_steps: number;
   weight_decay: number;
   max_grad_norm: number;
+  max_grad_value?: number | null;
   random_seed: number;
   packing: boolean;
   optim: string;
diff --git a/tests/python/test_vision_lora_targeting.py b/tests/python/test_vision_lora_targeting.py
new file mode 100644
index 0000000000..0a27569efd
--- /dev/null
+++ b/tests/python/test_vision_lora_targeting.py
@@ -0,0 +1,43 @@
+from pathlib import Path
+import re
+
+import torch
+
+
+def test_vlm_lora_regex_respects_language_only_with_explicit_targets():
+    from unsloth_zoo.peft_utils import get_peft_regex
+
+    class FakeVLM(torch.nn.Module):
+        def __init__(self):
+            super().__init__()
+            self.language_model = torch.nn.Module()
+            self.language_model.layers = torch.nn.ModuleList([torch.nn.Module()])
+            self.language_model.layers[0].self_attn = torch.nn.Module()
+            self.language_model.layers[0].self_attn.q_proj = torch.nn.Linear(4, 4)
+            self.vision_tower = torch.nn.Module()
+            self.vision_tower.vision_model = torch.nn.Module()
+            self.vision_tower.vision_model.encoder = torch.nn.Module()
+            self.vision_tower.vision_model.encoder.layers = torch.nn.ModuleList([torch.nn.Module()])
+            self.vision_tower.vision_model.encoder.layers[0].self_attn = torch.nn.Module()
+            self.vision_tower.vision_model.encoder.layers[0].self_attn.q_proj = torch.nn.Linear(
+                4, 4
+            )
+
+    regex = get_peft_regex(
+        FakeVLM(),
+        finetune_vision_layers = False,
+        finetune_language_layers = True,
+        finetune_attention_modules = True,
+        finetune_mlp_modules = True,
+        target_modules = ["q_proj"],
+    )
+
+    assert re.search(regex, "language_model.layers.0.self_attn.q_proj")
+    assert not re.search(regex, "vision_tower.vision_model.encoder.layers.0.self_attn.q_proj")
+
+
+def test_fast_vision_model_wraps_explicit_targets_when_layer_filters_are_used():
+    source = Path("unsloth/models/vision.py").read_text()
+
+    assert "target_modules = get_peft_regex(" in source
+    assert "target_modules = list(target_modules)" in source
diff --git a/tests/studio/run_real_mlx_smoke.py b/tests/studio/run_real_mlx_smoke.py
index 6bf0288206..e58b2098d8 100644
--- a/tests/studio/run_real_mlx_smoke.py
+++ b/tests/studio/run_real_mlx_smoke.py
@@ -11,9 +11,10 @@
     python run_real_mlx_smoke.py reload --format {lora|merged|gguf} --dir D
 
 `train` loads gemma-3-270m-it, applies LoRA, probes pre/post loss+grad,
-overfits one repeated row, generates, saves in lora/merged_16bit/gguf
-(gguf best-effort), and writes train_metrics.json. `reload` reopens each
-saved format in a fresh process and writes <format>_reload_metrics.json.
+overfits one repeated row for 30 deterministic steps (batch 2, accum 3),
+generates, saves in lora/merged_16bit/gguf (gguf best-effort), and writes
+train_metrics.json. `reload` reopens each saved format in a fresh process
+and writes <format>_reload_metrics.json.
 
 GGUF export and LoRA reload fixes land in unslothai/unsloth-zoo#627.
 
@@ -120,10 +121,9 @@ def _compute_loss_and_grad_norm(model, tokenizer, text: str) -> tuple[float, flo
     import mlx.nn as nn
     from mlx.utils import tree_flatten
 
+    # Match Studio's text dataset path: Studio passes exactly the formatted
+    # text to the tokenizer and does not append EOS behind the user's back.
     ids = list(tokenizer.encode(text))
-    eos_id = getattr(tokenizer, "eos_token_id", None)
-    if eos_id is not None:
-        ids.append(int(eos_id))
     if len(ids) < 2:
         raise RuntimeError(f"text too short to compute loss: {len(ids)} tokens")
 
@@ -268,10 +268,9 @@ def cmd_train(args) -> int:
             lr_scheduler_type = "constant",
             optim = "adamw",
             weight_decay = 0.0,
-            # Elementwise value clip is cheaper than norm clip on MLX (no
-            # cross-tree reduction) and has a higher 13-seed pass rate at this
-            # fixture (value=1.0 62%, norm=1.0 46%). Pin both: value wins when
-            # both > 0, so disable norm.
+            # Pin the elementwise clip to match the 13-seed-tested fixture
+            # (value=1.0 62% pass, norm=1.0 46%). Zoo's new MLX default is
+            # max_grad_leaf_norm=1.0; explicit value wins, norm disabled.
             max_grad_norm = 0.0,
             max_grad_value = 1.0,
             logging_steps = 1,
@@ -329,9 +328,14 @@ def _on_step(
     }
     # logging_steps=1 + max_steps=N -> N callbacks; track config so the
     # gate auto-follows if max_steps is bumped again.
+    expected_logged_steps = int(config.max_steps)
     assert (
-        len(losses_per_step) == config.max_steps
-    ), f"expected {config.max_steps} logged steps, got {losses_per_step}"
+        len(losses_per_step) == expected_logged_steps
+    ), f"expected {expected_logged_steps} logged steps, got {losses_per_step}"
+    if "train_steps" in train_result:
+        assert int(train_result["train_steps"]) == expected_logged_steps, (
+            f"expected train_steps={expected_logged_steps}, got " f"{train_result['train_steps']}"
+        )
     for i, l in enumerate(losses_per_step):
         # Allow exact 0.0: fp16 per-step loss underflows to 0.0 after
         # the LoRA reaches loss=0 around step ~10 with this fixture +
diff --git a/unsloth/models/vision.py b/unsloth/models/vision.py
index e8161427d5..66f9cf3d1b 100644
--- a/unsloth/models/vision.py
+++ b/unsloth/models/vision.py
@@ -1399,6 +1399,25 @@ def get_peft_model(
             )
         else:
             assert type(target_modules) in (list, tuple, str)
+            if type(target_modules) in (list, tuple) and (
+                not finetune_vision_layers
+                or not finetune_language_layers
+                or not finetune_attention_modules
+                or not finetune_mlp_modules
+            ):
+                print(
+                    "Unsloth: Explicit target_modules are constrained by the "
+                    "finetune_(vision|language|attention|mlp) filters; adapters "
+                    "attach only where both select."
+                )
+                target_modules = get_peft_regex(
+                    model,
+                    finetune_vision_layers = finetune_vision_layers,
+                    finetune_language_layers = finetune_language_layers,
+                    finetune_attention_modules = finetune_attention_modules,
+                    finetune_mlp_modules = finetune_mlp_modules,
+                    target_modules = list(target_modules),
+                )
 
         if hasattr(model, "vllm_engine"):
             if (