From c39f56fce039742693814b7770bde020399251a3 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Sat, 9 Aug 2025 14:45:43 -0700
Subject: [PATCH 001/154] Fix mamba

---
 unsloth/models/loader.py | 2 ++
 unsloth/models/vision.py | 1 +
 2 files changed, 3 insertions(+)

diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py
index ea746be43d..75561c4775 100644
--- a/unsloth/models/loader.py
+++ b/unsloth/models/loader.py
@@ -587,6 +587,8 @@ def from_pretrained(
             if transformers_version < Version("4.53.0"):
                 raise RuntimeError("Unsloth: Gemma 3N only works on transformers >= 4.53.0" + LATEST)
         elif "falcon-h1" in lowered_model_name:
+            # Falcon must use float32 Triton ie TRITON_F32_DEFAULT = 'ieee'
+            # since Mamba kernels error out on using lower precision
             os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \
                 "float16;torch.float32;torch.float16;"\
                 "if name.endswith(('q_proj', 'k_proj', 'v_proj', 'o_proj', 'gate_proj', 'up_proj', 'down_proj', 'head')): module.to(torch.float16); "\
diff --git a/unsloth/models/vision.py b/unsloth/models/vision.py
index 5524d8f16d..bdf86196d4 100644
--- a/unsloth/models/vision.py
+++ b/unsloth/models/vision.py
@@ -373,6 +373,7 @@ def from_pretrained(
                 custom_datatype = _custom_datatype
                 # Execute code as well
                 if len(execute_code.strip()) != 0:
+                    print(execute_code)
                     exec(execute_code)
             else:
                 custom_datatype = None

From 4bd35c509f26c4ff3409090175bba7fab4a604a9 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Sat, 9 Aug 2025 14:50:53 -0700
Subject: [PATCH 002/154] Update loader.py

---
 unsloth/models/loader.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py
index 75561c4775..186d302d44 100644
--- a/unsloth/models/loader.py
+++ b/unsloth/models/loader.py
@@ -591,8 +591,8 @@ def from_pretrained(
             # since Mamba kernels error out on using lower precision
             os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \
                 "float16;torch.float32;torch.float16;"\
-                "if name.endswith(('q_proj', 'k_proj', 'v_proj', 'o_proj', 'gate_proj', 'up_proj', 'down_proj', 'head')): module.to(torch.float16); "\
-                "os.environ['TRITON_F32_DEFAULT'] = 'ieee';"
+                "if name.endswith(('q_proj', 'k_proj', 'v_proj', 'o_proj', 'gate_proj', 'up_proj', 'down_proj', 'head')): "\
+                "module, os.environ['TRITON_F32_DEFAULT'] = module.to(torch.float16), 'ieee'"
         elif "gpt-oss" in lowered_model_name:
             os.environ["UNSLOTH_DISABLE_STATIC_GENERATION"] = "1"
             # CCE fails on Tesla T4

From 1f0a4c32aac3ca721fb50cad39a8dbbf28e4fc1b Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Sat, 9 Aug 2025 14:51:04 -0700
Subject: [PATCH 003/154] Update vision.py

---
 unsloth/models/vision.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/unsloth/models/vision.py b/unsloth/models/vision.py
index bdf86196d4..5524d8f16d 100644
--- a/unsloth/models/vision.py
+++ b/unsloth/models/vision.py
@@ -373,7 +373,6 @@ def from_pretrained(
                 custom_datatype = _custom_datatype
                 # Execute code as well
                 if len(execute_code.strip()) != 0:
-                    print(execute_code)
                     exec(execute_code)
             else:
                 custom_datatype = None

From 3cb97197d56f31c040c8bc17f68bb682aacb1928 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Sat, 9 Aug 2025 14:54:35 -0700
Subject: [PATCH 004/154] Update loader.py

---
 unsloth/models/loader.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py
index 186d302d44..b8f2432fc0 100644
--- a/unsloth/models/loader.py
+++ b/unsloth/models/loader.py
@@ -591,8 +591,8 @@ def from_pretrained(
             # since Mamba kernels error out on using lower precision
             os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \
                 "float16;torch.float32;torch.float16;"\
-                "if name.endswith(('q_proj', 'k_proj', 'v_proj', 'o_proj', 'gate_proj', 'up_proj', 'down_proj', 'head')): "\
-                "module, os.environ['TRITON_F32_DEFAULT'] = module.to(torch.float16), 'ieee'"
+                "if name.endswith(('q_proj', 'k_proj', 'v_proj', 'o_proj', 'gate_proj', 'up_proj', 'down_proj', 'head')): module.to(torch.float16);"\
+                "os.environ['TRITON_F32_DEFAULT'] = 'ieee'"
         elif "gpt-oss" in lowered_model_name:
             os.environ["UNSLOTH_DISABLE_STATIC_GENERATION"] = "1"
             # CCE fails on Tesla T4

From 1432eac9d0b82ab732e4e4f1f9fbb0fbbb4c63df Mon Sep 17 00:00:00 2001
From: Datta Nimmaturi <venkatadattasainimmaturi@gmail.com>
Date: Wed, 13 Aug 2025 08:16:43 +0530
Subject: [PATCH 005/154] Filter vLLM standby logs (#3131)

* filter vLLM standby logs

* safeguard standby logger patch

* Update unsloth/models/_utils.py

* Update unsloth/models/_utils.py

* Update unsloth/models/_utils.py

---------

Co-authored-by: Daniel Han <danielhanchen@gmail.com>
---
 unsloth/models/_utils.py | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/unsloth/models/_utils.py b/unsloth/models/_utils.py
index 4426a28266..d904d8674a 100644
--- a/unsloth/models/_utils.py
+++ b/unsloth/models/_utils.py
@@ -152,6 +152,40 @@ def __init__(self, text): self.text = text
     def filter(self, x): return not (self.text in x.getMessage())
 pass
 
+if os.environ.get('UNSLOTH_ENABLE_LOGGING', '0') != '1':
+    try:
+        from vllm.worker.worker import logger as vllm_worker_logger
+        vllm_worker_logger.addFilter(HideLoggingMessage("Sleep mode freed"))
+        del vllm_worker_logger
+    except:
+        pass
+    try:
+        from vllm.v1.worker.gpu_worker import logger as vllm_gpu_worker_logger
+        vllm_gpu_worker_logger.addFilter(HideLoggingMessage("Sleep mode freed"))
+        del vllm_gpu_worker_logger
+    except:
+        pass
+    try:
+        from vllm.executor.executor_base import logger as vllm_executor_logger
+        vllm_executor_logger.addFilter(HideLoggingMessage("to fall asleep"))
+        vllm_executor_logger.addFilter(HideLoggingMessage("to wake up"))
+        del vllm_executor_logger
+    except:
+        pass
+    try:
+        from vllm.core.block.prefix_caching_block import logger as vllm_prefix_caching_logger
+        vllm_prefix_caching_logger.addFilter(HideLoggingMessage("reset prefix cache"))
+        del vllm_prefix_caching_logger
+    except:
+        pass
+    try:
+        from vllm.v1.core.block_pool import logger as vllm_block_pool_logger
+        vllm_block_pool_logger.addFilter(HideLoggingMessage("reset prefix cache"))
+        del vllm_block_pool_logger
+    except:
+        pass
+pass
+
 # The speedups for torchdynamo mostly come with GPU Ampere or higher and which is not detected here.
 from transformers.training_args import logger as transformers_training_args_logger
 transformers_training_args_logger.addFilter(HideLoggingMessage("The speedups"))

From fd1124ab64c96af40dbdf8294a9e2bdaa55e01cf Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Tue, 12 Aug 2025 21:26:39 -0700
Subject: [PATCH 006/154] Update loader.py

---
 unsloth/models/loader.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py
index b8f2432fc0..15f3e43aef 100644
--- a/unsloth/models/loader.py
+++ b/unsloth/models/loader.py
@@ -111,6 +111,14 @@ def from_pretrained(
         disable_log_stats          = True,
         *args, **kwargs,
     ):
+        # Login to allow private models
+        if token is None: token = get_token()
+        if token is not None:
+            try:
+                from huggingface_hub import login
+                login(token = token)
+            except:
+                pass
         if load_in_8bit or full_finetuning:
             return FastModel.from_pretrained(
                 model_name                 = model_name,
@@ -513,6 +521,13 @@ def from_pretrained(
         *args, **kwargs,
     ):
         if token is None: token = get_token()
+        # Login to allow private models
+        if token is not None:
+            try:
+                from huggingface_hub import login
+                login(token = token)
+            except:
+                pass
         if whisper_language is not None: assert(type(whisper_language) is str)
         if whisper_task is not None: assert(type(whisper_task) is str)
         SUPPORTS_BFLOAT16 = is_bfloat16_supported()

From b78189b2d5a127b43a10f5aed1359a1cfe3629c5 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Wed, 13 Aug 2025 03:27:54 -0700
Subject: [PATCH 007/154] Add scaler

---
 unsloth/models/_utils.py | 12 ++++++++++++
 unsloth/models/rl.py     | 14 ++++++++++++++
 2 files changed, 26 insertions(+)

diff --git a/unsloth/models/_utils.py b/unsloth/models/_utils.py
index d904d8674a..3bd3c2c294 100644
--- a/unsloth/models/_utils.py
+++ b/unsloth/models/_utils.py
@@ -152,6 +152,7 @@ def __init__(self, text): self.text = text
     def filter(self, x): return not (self.text in x.getMessage())
 pass
 
+# Stop vLLM messages
 if os.environ.get('UNSLOTH_ENABLE_LOGGING', '0') != '1':
     try:
         from vllm.worker.worker import logger as vllm_worker_logger
@@ -258,6 +259,17 @@ def filter(self, x): return not (self.text in x.getMessage())
 except:
     pass
 
+# You passed `quantization_config` or equivalent parameters
+try:
+    warnings.filterwarnings(
+        action = "ignore",
+        message = r".*quantization_config.*",
+        category = UserWarning,
+        append = True,
+    )
+except:
+    pass
+
 # Errors out on
 # Some weights of Gemma3nForConditionalGeneration were not initialized from the model checkpoint
 from transformers.modeling_utils import logger as transformers_logger
diff --git a/unsloth/models/rl.py b/unsloth/models/rl.py
index deb779588c..e751ef5e30 100644
--- a/unsloth/models/rl.py
+++ b/unsloth/models/rl.py
@@ -421,6 +421,20 @@ def _patch_trl_rl_trainers(trainer_file = "grpo_trainer"):
         RLTrainer_post += neftune_check
     pass
 
+    # Add accelerator scaler to model
+    if "model" in call_args:
+        neftune_check = \
+        "if hasattr(self, 'accelerator'):\n"\
+        "    scaler = self.accelerator.scaler\n"\
+        "    current_model = model\n"\
+        "    while hasattr(current_model, 'model'):\n"\
+        "        current_model.accelerator_scaler = scaler\n"\
+        "        current_model = current_model.model\n"\
+        "    current_model.accelerator_scaler = scaler\n"\
+        "pass\n"
+        RLTrainer_post += neftune_check
+    pass
+
     # Edit optional metrics
     other_metrics_processor = ""
     if trainer_file in RL_METRICS_CHANGES:

From cd2e284c97bb60618da78fcf1314f3a3a5885dd8 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Wed, 13 Aug 2025 05:12:35 -0700
Subject: [PATCH 008/154] Update llama.py

---
 unsloth/models/llama.py | 25 +++++++++++++++++++------
 1 file changed, 19 insertions(+), 6 deletions(-)

diff --git a/unsloth/models/llama.py b/unsloth/models/llama.py
index 3c0d5012ae..eafbd5a433 100644
--- a/unsloth/models/llama.py
+++ b/unsloth/models/llama.py
@@ -1197,12 +1197,25 @@ def _CausalLM_fast_forward(
                 if self.config.model_type == "falcon_h1":
                     hidden_states = hidden_states * self.config.lm_head_multiplier
 
-                loss = fused_linear_cross_entropy(
-                    hidden_states      = hidden_states,
-                    lm_weight          = lm_head,
-                    labels             = labels,
-                    num_items_in_batch = n_items,
-                    logit_softcapping  = logit_softcapping,
+                # loss = fused_linear_cross_entropy(
+                #     hidden_states      = hidden_states,
+                #     lm_weight          = lm_head,
+                #     labels             = labels,
+                #     num_items_in_batch = n_items,
+                #     logit_softcapping  = logit_softcapping,
+                # )
+                loss = unsloth_fused_ce_loss(
+                    trainer              = None,
+                    hidden_states        = hidden_states,
+                    lm_head_weight       = lm_head,
+                    lm_head_bias         = None,
+                    labels               = labels,
+                    mask                 = None,
+                    n_items              = n_items,
+                    scaling              = getattr(self, "accelerator_scaler", None),
+                    target_gb            = 1,
+                    torch_compile        = True,
+                    logit_softcapping    = logit_softcapping,
                 )
                 if not return_dict:
                     output = (logits,) + outputs[1:]

From 5e976a5881296f35c6affae56178d3a2abc1fb50 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Wed, 13 Aug 2025 05:18:55 -0700
Subject: [PATCH 009/154] Update _utils.py

---
 unsloth/models/_utils.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/unsloth/models/_utils.py b/unsloth/models/_utils.py
index 3bd3c2c294..d6eb82f01c 100644
--- a/unsloth/models/_utils.py
+++ b/unsloth/models/_utils.py
@@ -58,6 +58,7 @@
     "HAS_CUT_CROSS_ENTROPY",
     "EMPTY_LOGITS",
     "fused_linear_cross_entropy",
+    "unsloth_fused_ce_loss",
     "patch_unsloth_smart_gradient_checkpointing",
     "unpatch_unsloth_smart_gradient_checkpointing",
 
@@ -109,6 +110,7 @@
     HAS_CUT_CROSS_ENTROPY,
     fused_linear_cross_entropy,
     _unsloth_get_batch_samples,
+    unsloth_fused_ce_loss,
 )
 from unsloth_zoo.vision_utils import (
     process_vision_info,

From f451adff6be85230da2cd50bf068f23726d9b99d Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Wed, 13 Aug 2025 06:04:40 -0700
Subject: [PATCH 010/154] Versioning

---
 pyproject.toml           | 6 +++---
 unsloth/models/_utils.py | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 8e18688ddf..e563ba6fc5 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -7,7 +7,7 @@ name = "unsloth"
 dynamic = ["version"]
 description = "2-5X faster LLM finetuning"
 readme = "README.md"
-requires-python = ">=3.9,<3.13"
+requires-python = ">=3.9,<=3.13"
 license = {text = "Apache-2.0"}
 keywords = ["ai", "llm",]
 authors = [
@@ -37,7 +37,7 @@ triton = [
 ]
 
 huggingface = [
-    "unsloth_zoo>=2025.8.3",
+    "unsloth_zoo>=2025.8.4",
     "packaging",
     "tyro",
     "transformers>=4.51.3,!=4.47.0,!=4.52.0,!=4.52.1,!=4.52.2,!=4.52.3,!=4.53.0",
@@ -384,7 +384,7 @@ colab-ampere-torch220 = [
     "flash-attn>=2.6.3",
 ]
 colab-new = [
-    "unsloth_zoo>=2025.8.3",
+    "unsloth_zoo>=2025.8.4",
     "packaging",
     "tyro",
     "transformers>=4.51.3,!=4.47.0,!=4.52.0,!=4.52.1,!=4.52.2,!=4.52.3,!=4.53.0",
diff --git a/unsloth/models/_utils.py b/unsloth/models/_utils.py
index d6eb82f01c..d1df57ad5c 100644
--- a/unsloth/models/_utils.py
+++ b/unsloth/models/_utils.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__ = "2025.8.4"
+__version__ = "2025.8.5"
 
 __all__ = [
     "SUPPORTS_BFLOAT16",

From 3b82c4259cd7506b351bf9b073a3033be22da8aa Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Thu, 14 Aug 2025 03:31:47 -0700
Subject: [PATCH 011/154] GPT OSS fix

---
 unsloth/models/loader.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py
index 7ac27158a2..960f9cc23f 100644
--- a/unsloth/models/loader.py
+++ b/unsloth/models/loader.py
@@ -615,12 +615,18 @@ def from_pretrained(
             os.environ["UNSLOTH_ENABLE_CCE"] = "0"
             if not load_in_4bit:
                 # Only upcast MoE biases for MXFP4, not BnB
+                # Also set down projection compute dtype to be float32
                 os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \
                     "all;None;None;"\
                     "x = 'gate_up_proj_bias'\n"\
-                    "if hasattr(module, x): setattr(module, x, torch.nn.Parameter(getattr(module, x).to(torch.float32)) if isinstance(getattr(module, x), torch.nn.Parameter) else getattr(module, x).to(torch.float32))\n"\
+                    "if hasattr(module, x): "\
+                    "setattr(module, x, torch.nn.Parameter(getattr(module, x).to(torch.float32)) if isinstance(getattr(module, x), torch.nn.Parameter) else getattr(module, x).to(torch.float32))\n"\
                     "x = 'down_proj_bias'\n"\
-                    "if hasattr(module, x): setattr(module, x, torch.nn.Parameter(getattr(module, x).to(torch.float32)) if isinstance(getattr(module, x), torch.nn.Parameter) else getattr(module, x).to(torch.float32))\n;"
+                    "if hasattr(module, x): "\
+                    "setattr(module, x, torch.nn.Parameter(getattr(module, x).to(torch.float32)) if isinstance(getattr(module, x), torch.nn.Parameter) else getattr(module, x).to(torch.float32))\n"\
+                    ""\
+                    "if 'down_projs' in name and hasattr(module, 'compute_dtype'): module.compute_dtype = torch.float32\n"\
+                    ";"
         else:
             for check_model_name in DISABLE_COMPILE_MODEL_NAMES:
                 if check_model_name in lowered_model_name:

From 61366efc914563179c460c16e2e8e144fd4cb4d8 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Thu, 14 Aug 2025 03:50:52 -0700
Subject: [PATCH 012/154] GPT OSS fix

---
 unsloth/models/_utils.py | 2 ++
 unsloth/models/loader.py | 4 +++-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/unsloth/models/_utils.py b/unsloth/models/_utils.py
index d1df57ad5c..ab2694fde1 100644
--- a/unsloth/models/_utils.py
+++ b/unsloth/models/_utils.py
@@ -68,6 +68,7 @@
     "patch_fast_lora",
     "validate_loftq_config",
     "RaiseUninitialized",
+    "dequantize_module_weight",
 ]
 
 import torch
@@ -724,6 +725,7 @@ def prepare_model_for_kbit_training(
 # Weirdly LoraLayer.update_layer downcasts PEFT layers to float16??
 # For mixed precision, we need it to be in float32 not float16.
 from peft import __version__ as peft_version
+from peft.utils.integrations import dequantize_module_weight
 if Version(peft_version) < Version("0.12.0"):
     from peft.tuners.lora.layer import LoraLayer
     try:
diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py
index 960f9cc23f..bb102376d4 100644
--- a/unsloth/models/loader.py
+++ b/unsloth/models/loader.py
@@ -625,7 +625,9 @@ def from_pretrained(
                     "if hasattr(module, x): "\
                     "setattr(module, x, torch.nn.Parameter(getattr(module, x).to(torch.float32)) if isinstance(getattr(module, x), torch.nn.Parameter) else getattr(module, x).to(torch.float32))\n"\
                     ""\
-                    "if 'down_projs' in name and hasattr(module, 'compute_dtype'): module.compute_dtype = torch.float32\n"\
+                    "if 'down_projs' in name and hasattr(module, 'compute_dtype') and "\
+                    "torch.amax(dequantize_module_weight(module)) >= 1024:"\
+                    "module.compute_dtype = torch.float32\n"\
                     ";"
         else:
             for check_model_name in DISABLE_COMPILE_MODEL_NAMES:

From de043d95684df41bf69ec8ea3c29538a9bcab1e4 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Thu, 14 Aug 2025 04:28:57 -0700
Subject: [PATCH 013/154] Update loader.py

---
 unsloth/models/loader.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py
index bb102376d4..c61aab750d 100644
--- a/unsloth/models/loader.py
+++ b/unsloth/models/loader.py
@@ -627,7 +627,7 @@ def from_pretrained(
                     ""\
                     "if 'down_projs' in name and hasattr(module, 'compute_dtype') and "\
                     "torch.amax(dequantize_module_weight(module)) >= 1024:"\
-                    "module.compute_dtype = torch.float32\n"\
+                    "module._pre_set_compute_dtype = torch.float32\n"\
                     ";"
         else:
             for check_model_name in DISABLE_COMPILE_MODEL_NAMES:

From c1ef6f1a6270e24b47259856e4b229f44cbe4053 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Thu, 14 Aug 2025 04:36:16 -0700
Subject: [PATCH 014/154] Update vision.py

---
 unsloth/models/vision.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/unsloth/models/vision.py b/unsloth/models/vision.py
index 5524d8f16d..0f267104f3 100644
--- a/unsloth/models/vision.py
+++ b/unsloth/models/vision.py
@@ -458,6 +458,7 @@ def from_pretrained(
         # Edit data-types
         if custom_datatype is not None:
             for jj, (name, module) in enumerate(model.named_modules()):
+                print(custom_datatype)
                 exec(custom_datatype)
             pass
         pass

From f18cd268bae43f9c531bc78a0ded608339b9b056 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Thu, 14 Aug 2025 04:41:27 -0700
Subject: [PATCH 015/154] Update vision.py

---
 unsloth/models/vision.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/unsloth/models/vision.py b/unsloth/models/vision.py
index 0f267104f3..fcba556e7a 100644
--- a/unsloth/models/vision.py
+++ b/unsloth/models/vision.py
@@ -356,6 +356,7 @@ def from_pretrained(
         correct_dtype = None
         if os.environ.get("UNSLOTH_FORCE_CUSTOM_DTYPE", "") != "":
             custom_datatype = os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"]
+            print(custom_datatype)
             assert custom_datatype.count(";") >= 4
             checker, _dtype, _bnb_compute_dtype, _custom_datatype, execute_code = custom_datatype.split(";", 4)
 
@@ -371,6 +372,7 @@ def from_pretrained(
                     bnb_compute_dtype = eval(_bnb_compute_dtype)
                 correct_dtype = bnb_compute_dtype
                 custom_datatype = _custom_datatype
+                print(custom_datatype)
                 # Execute code as well
                 if len(execute_code.strip()) != 0:
                     exec(execute_code)
@@ -458,7 +460,6 @@ def from_pretrained(
         # Edit data-types
         if custom_datatype is not None:
             for jj, (name, module) in enumerate(model.named_modules()):
-                print(custom_datatype)
                 exec(custom_datatype)
             pass
         pass

From 02152243313ae76b42e4b887d7d5c1c87b0901a6 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Thu, 14 Aug 2025 04:44:56 -0700
Subject: [PATCH 016/154] Update loader.py

---
 unsloth/models/loader.py | 9 +++++----
 unsloth/models/vision.py | 2 --
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py
index c61aab750d..d0b7d4dc4c 100644
--- a/unsloth/models/loader.py
+++ b/unsloth/models/loader.py
@@ -615,16 +615,17 @@ def from_pretrained(
             os.environ["UNSLOTH_ENABLE_CCE"] = "0"
             if not load_in_4bit:
                 # Only upcast MoE biases for MXFP4, not BnB
-                # Also set down projection compute dtype to be float32
                 os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \
                     "all;None;None;"\
                     "x = 'gate_up_proj_bias'\n"\
                     "if hasattr(module, x): "\
                     "setattr(module, x, torch.nn.Parameter(getattr(module, x).to(torch.float32)) if isinstance(getattr(module, x), torch.nn.Parameter) else getattr(module, x).to(torch.float32))\n"\
                     "x = 'down_proj_bias'\n"\
-                    "if hasattr(module, x): "\
-                    "setattr(module, x, torch.nn.Parameter(getattr(module, x).to(torch.float32)) if isinstance(getattr(module, x), torch.nn.Parameter) else getattr(module, x).to(torch.float32))\n"\
-                    ""\
+                    ";"
+            else:
+                # Set down projection compute dtype to be float32 for float16 machines
+                os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \
+                    "all;None;None;"\
                     "if 'down_projs' in name and hasattr(module, 'compute_dtype') and "\
                     "torch.amax(dequantize_module_weight(module)) >= 1024:"\
                     "module._pre_set_compute_dtype = torch.float32\n"\
diff --git a/unsloth/models/vision.py b/unsloth/models/vision.py
index fcba556e7a..5524d8f16d 100644
--- a/unsloth/models/vision.py
+++ b/unsloth/models/vision.py
@@ -356,7 +356,6 @@ def from_pretrained(
         correct_dtype = None
         if os.environ.get("UNSLOTH_FORCE_CUSTOM_DTYPE", "") != "":
             custom_datatype = os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"]
-            print(custom_datatype)
             assert custom_datatype.count(";") >= 4
             checker, _dtype, _bnb_compute_dtype, _custom_datatype, execute_code = custom_datatype.split(";", 4)
 
@@ -372,7 +371,6 @@ def from_pretrained(
                     bnb_compute_dtype = eval(_bnb_compute_dtype)
                 correct_dtype = bnb_compute_dtype
                 custom_datatype = _custom_datatype
-                print(custom_datatype)
                 # Execute code as well
                 if len(execute_code.strip()) != 0:
                     exec(execute_code)

From 5ed4a46e7c37e81e9db29f205ad811b061c330c1 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Thu, 14 Aug 2025 17:23:46 -0700
Subject: [PATCH 017/154] Update vision.py

---
 unsloth/models/vision.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/unsloth/models/vision.py b/unsloth/models/vision.py
index 5524d8f16d..bfd0011f89 100644
--- a/unsloth/models/vision.py
+++ b/unsloth/models/vision.py
@@ -386,6 +386,7 @@ def from_pretrained(
             print(f"Unsloth: {model_type_arch.title()} does not support SDPA - switching to eager!")
             del kwargs["attn_implementation"]
         pass
+        print(supports_sdpa, kwargs)
 
         bnb_config = None
         if full_finetuning and (load_in_4bit or load_in_8bit):

From a22255811467e34ddac87e9af9879e141bb35673 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Thu, 14 Aug 2025 19:22:16 -0700
Subject: [PATCH 018/154] Update vision.py

---
 unsloth/models/vision.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/unsloth/models/vision.py b/unsloth/models/vision.py
index bfd0011f89..5524d8f16d 100644
--- a/unsloth/models/vision.py
+++ b/unsloth/models/vision.py
@@ -386,7 +386,6 @@ def from_pretrained(
             print(f"Unsloth: {model_type_arch.title()} does not support SDPA - switching to eager!")
             del kwargs["attn_implementation"]
         pass
-        print(supports_sdpa, kwargs)
 
         bnb_config = None
         if full_finetuning and (load_in_4bit or load_in_8bit):

From 6cffb1cb06a7b2b5d14a3d36acc5970f1bd790a5 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Fri, 15 Aug 2025 04:25:15 -0700
Subject: [PATCH 019/154] Update llama.py

---
 unsloth/models/llama.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/unsloth/models/llama.py b/unsloth/models/llama.py
index ab7f4bfdde..ae03a685eb 100644
--- a/unsloth/models/llama.py
+++ b/unsloth/models/llama.py
@@ -701,8 +701,9 @@ def LlamaModel_fast_forward(
     # Fix out of bounds tokenization
     if hasattr(self, "max_seq_length"):
         if seq_length > self.max_seq_length:
+            shape = input_ids.shape if input_ids is not None else inputs_embeds.shape
             logger.warning_once(
-                f"Unsloth: Input IDs of length {seq_length} > the model's max sequence length of {self.max_seq_length}.\n"\
+                f"Unsloth: Input IDs of shape {shape} with length {seq_length} > the model's max sequence length of {self.max_seq_length}.\n"\
                 "We shall truncate it ourselves. It's imperative if you correct this issue first."
             )
         if input_ids is not None:

From 15d33a5f0a3fed1e8fbd89acf25dda33ceefc436 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Fri, 15 Aug 2025 04:34:50 -0700
Subject: [PATCH 020/154] Update llama.py

---
 unsloth/models/llama.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/unsloth/models/llama.py b/unsloth/models/llama.py
index ae03a685eb..badcd51a12 100644
--- a/unsloth/models/llama.py
+++ b/unsloth/models/llama.py
@@ -698,6 +698,9 @@ def LlamaModel_fast_forward(
 
     seq_length_with_past = seq_length
 
+    shape = input_ids.shape if input_ids is not None else inputs_embeds.shape
+    print(shape)
+
     # Fix out of bounds tokenization
     if hasattr(self, "max_seq_length"):
         if seq_length > self.max_seq_length:

From 95a4dafadb9c1a3b65b4b0c0643741a4b6e144eb Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Fri, 15 Aug 2025 04:54:45 -0700
Subject: [PATCH 021/154] Update llama.py

---
 unsloth/models/llama.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/unsloth/models/llama.py b/unsloth/models/llama.py
index badcd51a12..ae03a685eb 100644
--- a/unsloth/models/llama.py
+++ b/unsloth/models/llama.py
@@ -698,9 +698,6 @@ def LlamaModel_fast_forward(
 
     seq_length_with_past = seq_length
 
-    shape = input_ids.shape if input_ids is not None else inputs_embeds.shape
-    print(shape)
-
     # Fix out of bounds tokenization
     if hasattr(self, "max_seq_length"):
         if seq_length > self.max_seq_length:

From 4104bba896a760833061ece7dbbdff7423b5d141 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Fri, 15 Aug 2025 04:55:34 -0700
Subject: [PATCH 022/154] Versioning

---
 pyproject.toml           | 4 ++--
 unsloth/models/_utils.py | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index e563ba6fc5..6f6f225bde 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -37,7 +37,7 @@ triton = [
 ]
 
 huggingface = [
-    "unsloth_zoo>=2025.8.4",
+    "unsloth_zoo>=2025.8.5",
     "packaging",
     "tyro",
     "transformers>=4.51.3,!=4.47.0,!=4.52.0,!=4.52.1,!=4.52.2,!=4.52.3,!=4.53.0",
@@ -384,7 +384,7 @@ colab-ampere-torch220 = [
     "flash-attn>=2.6.3",
 ]
 colab-new = [
-    "unsloth_zoo>=2025.8.4",
+    "unsloth_zoo>=2025.8.5",
     "packaging",
     "tyro",
     "transformers>=4.51.3,!=4.47.0,!=4.52.0,!=4.52.1,!=4.52.2,!=4.52.3,!=4.53.0",
diff --git a/unsloth/models/_utils.py b/unsloth/models/_utils.py
index ab2694fde1..c84fd118e7 100644
--- a/unsloth/models/_utils.py
+++ b/unsloth/models/_utils.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__ = "2025.8.5"
+__version__ = "2025.8.6"
 
 __all__ = [
     "SUPPORTS_BFLOAT16",

From 8cc1999edaee313354f76c2c232389ad3bf07f23 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Fri, 15 Aug 2025 05:03:06 -0700
Subject: [PATCH 023/154] Update mapper.py

---
 unsloth/models/mapper.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/unsloth/models/mapper.py b/unsloth/models/mapper.py
index 829fe29583..e8fc55c2bd 100644
--- a/unsloth/models/mapper.py
+++ b/unsloth/models/mapper.py
@@ -941,6 +941,16 @@
         "Qwen/Qwen3-4B-Thinking-2507",
         "unsloth/Qwen3-4B-Thinking-2507-bnb-4bit",
     ),
+    "unsloth/gemma-3-270m-it-unsloth-bnb-4bit" : (
+        "unsloth/gemma-3-270m-it",
+        "google/gemma-3-270m-it",
+        "unsloth/gemma-3-270m-it-bnb-4bit",
+    ),
+    "unsloth/gemma-3-270m-unsloth-bnb-4bit" : (
+        "unsloth/gemma-3-270m",
+        "google/gemma-3-270m",
+        "unsloth/gemma-3-270m-bnb-4bit",
+    ),
 }
 
 INT_TO_FLOAT_MAPPER  = {}

From ffda8a743c54fb648e8fef8039dfbd724d2fdce2 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Fri, 15 Aug 2025 18:39:46 -0700
Subject: [PATCH 024/154] Update vision.py

---
 unsloth/models/vision.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/unsloth/models/vision.py b/unsloth/models/vision.py
index a5de457cef..a629021339 100644
--- a/unsloth/models/vision.py
+++ b/unsloth/models/vision.py
@@ -451,6 +451,7 @@ def from_pretrained(
             # attn_implementation   = attn_implementation,
             **kwargs,
         )
+        print(model.model.layers[0].input_layernorm.weight, model.model.layers[0].input_layernorm.weight.dtype)
         raise_handler.remove()
         # Return old flag
         os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = old_hf_transfer

From cdf2e17aea327a652b034a9a2601fee0ae780fb5 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Fri, 15 Aug 2025 18:49:30 -0700
Subject: [PATCH 025/154] Update vision.py

---
 unsloth/models/vision.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/unsloth/models/vision.py b/unsloth/models/vision.py
index a629021339..fa3bb25e12 100644
--- a/unsloth/models/vision.py
+++ b/unsloth/models/vision.py
@@ -470,6 +470,7 @@ def from_pretrained(
             if DEVICE_TYPE == "cuda":  torch.cuda.empty_cache()
             elif DEVICE_TYPE == "xpu": torch.xpu.empty_cache()
         pass
+        print(model.model.layers[0].input_layernorm.weight, model.model.layers[0].input_layernorm.weight.dtype)
 
         # Counteract saved tokenizers
         tokenizer_name = model_name if tokenizer_name is None else tokenizer_name
@@ -516,6 +517,7 @@ def from_pretrained(
         )
         model, tokenizer = patch_tokenizer(model, tokenizer)
         model = post_patch_loss_function(model)
+        print(model.model.layers[0].input_layernorm.weight, model.model.layers[0].input_layernorm.weight.dtype)
 
         # Log Unsloth version for future fastpaths for inference
         if hasattr(model, "config"):

From 941d1aeb8f6fb724ca2ca2bc6793980e0647931c Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Fri, 15 Aug 2025 18:52:00 -0700
Subject: [PATCH 026/154] Update vision.py

---
 unsloth/models/vision.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/unsloth/models/vision.py b/unsloth/models/vision.py
index fa3bb25e12..4dc9cc4639 100644
--- a/unsloth/models/vision.py
+++ b/unsloth/models/vision.py
@@ -507,6 +507,7 @@ def from_pretrained(
                 tokenizer.pad_token_id = __tokenizer.pad_token_id
         pass
         # Fix other stuff like BnB compute data types
+        print("do_forced_float32", do_forced_float32)
         model, tokenizer = patch_model_and_tokenizer(
             model,
             tokenizer,

From 73fa72cb69866bec70cad78855fef994eb95b916 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Fri, 15 Aug 2025 19:13:27 -0700
Subject: [PATCH 027/154] Upcast norms

---
 unsloth/models/loader.py | 33 +++++++++++++++++++++++++++------
 1 file changed, 27 insertions(+), 6 deletions(-)

diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py
index 59226f0f42..edd909abfe 100644
--- a/unsloth/models/loader.py
+++ b/unsloth/models/loader.py
@@ -571,8 +571,15 @@ def from_pretrained(
         elif "qwen2.5" in lowered_model_name and transformers_version < Version("4.49.0"):
             raise RuntimeError("Unsloth: Qwen 2.5 only works on transformers >= 4.49.0." + LATEST)
         # Gemma 3
-        elif "gemma-3" in lowered_model_name and transformers_version < Version("4.50.0.dev0"):
-            raise RuntimeError("Unsloth: Gemma 3 only works on transformers >= 4.50.0." + NIGHTLY)
+        elif "gemma-3" in lowered_model_name:
+            if transformers_version < Version("4.50.0.dev0"):
+                raise RuntimeError("Unsloth: Gemma 3 only works on transformers >= 4.50.0." + NIGHTLY)
+            # Set norms to float32 since anyways they get upcasted to float32
+            os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \
+                "all;None;None;"\
+                "if name.endswith('norm'): "\
+                "module._pre_set_compute_dtype = torch.float32\n"\
+                ";"
         # Cohere
         elif "c4ai-command-a-03-2025" in lowered_model_name and transformers_version < Version("4.50.0.dev0"):
             raise RuntimeError("Unsloth: Cohere's Command model only works on transformers >= 4.50.0." + NIGHTLY)
@@ -582,7 +589,8 @@ def from_pretrained(
             os.environ["UNSLOTH_DISABLE_STATIC_GENERATION"] = "1" # Sesame fails
             os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \
                 "all;torch.float32;torch.float16;"\
-                "if name.endswith(('_proj', 'fc1', 'fc2', 'codebook', 'head')): module.to(torch.float16);"
+                "if name.endswith(('_proj', 'fc1', 'fc2', 'codebook', 'head')): module.to(torch.float16)"\
+                ";"
         # Granite 4
         elif 'granite-4' in lowered_model_name:
             # granite-4 rms norms are stored as 16 bit, but we upcast
@@ -594,9 +602,12 @@ def from_pretrained(
         # Gemma 3N
         elif "gemma-3n" in lowered_model_name:
             os.environ["UNSLOTH_DISABLE_STATIC_GENERATION"] = "1"
+            # Set norms to float32 since anyways they get upcasted to float32
             os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \
                 "float16;torch.float16;torch.float16;"\
-                "if name.endswith(('.conv')): module;"\
+                "if name.endswith('norm'): "\
+                "module._pre_set_compute_dtype = torch.float32\n"\
+                ";"\
                 "from unsloth_zoo.temporary_patches.gemma3n import patch_Gemma3nConvNormAct_forward; patch_Gemma3nConvNormAct_forward()"
             
             if transformers_version < Version("4.53.0"):
@@ -606,7 +617,8 @@ def from_pretrained(
             # since Mamba kernels error out on using lower precision
             os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \
                 "float16;torch.float32;torch.float16;"\
-                "if name.endswith(('q_proj', 'k_proj', 'v_proj', 'o_proj', 'gate_proj', 'up_proj', 'down_proj', 'head')): module.to(torch.float16);"\
+                "if name.endswith(('q_proj', 'k_proj', 'v_proj', 'o_proj', 'gate_proj', 'up_proj', 'down_proj', 'head')): module.to(torch.float16)"\
+                ";"\
                 "os.environ['TRITON_F32_DEFAULT'] = 'ieee'"
         elif "gpt-oss" in lowered_model_name:
             os.environ["UNSLOTH_DISABLE_STATIC_GENERATION"] = "1"
@@ -615,22 +627,31 @@ def from_pretrained(
             os.environ["UNSLOTH_ENABLE_CCE"] = "0"
             if not load_in_4bit:
                 # Only upcast MoE biases for MXFP4, not BnB
+                # Set norms to float32 since anyways they get upcasted to float32
                 os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \
                     "all;None;None;"\
                     "x = 'gate_up_proj_bias'\n"\
                     "if hasattr(module, x): "\
                     "setattr(module, x, torch.nn.Parameter(getattr(module, x).to(torch.float32)) if isinstance(getattr(module, x), torch.nn.Parameter) else getattr(module, x).to(torch.float32))\n"\
+                    ""\
                     "x = 'down_proj_bias'\n"\
                     "if hasattr(module, x): "\
                     "setattr(module, x, torch.nn.Parameter(getattr(module, x).to(torch.float32)) if isinstance(getattr(module, x), torch.nn.Parameter) else getattr(module, x).to(torch.float32))\n"\
+                    ""\
+                    "if name.endswith('norm'): "\
+                    "module._pre_set_compute_dtype = torch.float32\n"\
                     ";"
             else:
                 # Set down projection compute dtype to be float32 for float16 machines
+                # Set norms to float32 since anyways they get upcasted to float32
                 os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \
                     "all;None;None;"\
-                    "if 'down_projs' in name and hasattr(module, 'compute_dtype') and "\
+                    "if 'down_projs' in name and "\
                     "torch.amax(dequantize_module_weight(module)) >= 1024:"\
                     "module._pre_set_compute_dtype = torch.float32\n"\
+                    ""\
+                    "if name.endswith('norm'): "\
+                    "module._pre_set_compute_dtype = torch.float32\n"\
                     ";"
         else:
             for check_model_name in DISABLE_COMPILE_MODEL_NAMES:

From e4bbeef2c9b56635ff20ffbaff865c26a052babc Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Fri, 15 Aug 2025 19:22:19 -0700
Subject: [PATCH 028/154] Update loader.py

---
 unsloth/models/loader.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py
index edd909abfe..86850b0253 100644
--- a/unsloth/models/loader.py
+++ b/unsloth/models/loader.py
@@ -646,7 +646,7 @@ def from_pretrained(
                 # Set norms to float32 since anyways they get upcasted to float32
                 os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \
                     "all;None;None;"\
-                    "if 'down_projs' in name and "\
+                    "if 'down_projs' in name and hasattr(module, 'weight') and "\
                     "torch.amax(dequantize_module_weight(module)) >= 1024:"\
                     "module._pre_set_compute_dtype = torch.float32\n"\
                     ""\

From c8d00bebb323700f00742dec14b1319603db7720 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Fri, 15 Aug 2025 19:25:03 -0700
Subject: [PATCH 029/154] Update vision.py

---
 unsloth/models/vision.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/unsloth/models/vision.py b/unsloth/models/vision.py
index 4dc9cc4639..a5de457cef 100644
--- a/unsloth/models/vision.py
+++ b/unsloth/models/vision.py
@@ -451,7 +451,6 @@ def from_pretrained(
             # attn_implementation   = attn_implementation,
             **kwargs,
         )
-        print(model.model.layers[0].input_layernorm.weight, model.model.layers[0].input_layernorm.weight.dtype)
         raise_handler.remove()
         # Return old flag
         os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = old_hf_transfer
@@ -470,7 +469,6 @@ def from_pretrained(
             if DEVICE_TYPE == "cuda":  torch.cuda.empty_cache()
             elif DEVICE_TYPE == "xpu": torch.xpu.empty_cache()
         pass
-        print(model.model.layers[0].input_layernorm.weight, model.model.layers[0].input_layernorm.weight.dtype)
 
         # Counteract saved tokenizers
         tokenizer_name = model_name if tokenizer_name is None else tokenizer_name
@@ -507,7 +505,6 @@ def from_pretrained(
                 tokenizer.pad_token_id = __tokenizer.pad_token_id
         pass
         # Fix other stuff like BnB compute data types
-        print("do_forced_float32", do_forced_float32)
         model, tokenizer = patch_model_and_tokenizer(
             model,
             tokenizer,
@@ -518,7 +515,6 @@ def from_pretrained(
         )
         model, tokenizer = patch_tokenizer(model, tokenizer)
         model = post_patch_loss_function(model)
-        print(model.model.layers[0].input_layernorm.weight, model.model.layers[0].input_layernorm.weight.dtype)
 
         # Log Unsloth version for future fastpaths for inference
         if hasattr(model, "config"):

From 564b6f8cd6f73bd0f064347a0d83ab236783317e Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Sat, 16 Aug 2025 23:10:15 -0700
Subject: [PATCH 030/154] Upcast layernorms

---
 unsloth/models/loader.py | 24 +++++++++---------------
 unsloth/models/vision.py |  6 ++++++
 2 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py
index 86850b0253..e59aef1fd0 100644
--- a/unsloth/models/loader.py
+++ b/unsloth/models/loader.py
@@ -575,11 +575,7 @@ def from_pretrained(
             if transformers_version < Version("4.50.0.dev0"):
                 raise RuntimeError("Unsloth: Gemma 3 only works on transformers >= 4.50.0." + NIGHTLY)
             # Set norms to float32 since anyways they get upcasted to float32
-            os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \
-                "all;None;None;"\
-                "if name.endswith('norm'): "\
-                "module._pre_set_compute_dtype = torch.float32\n"\
-                ";"
+            os.environ["UNSLOTH_HIGH_PRECISION_LAYERNORM"] = "1"
         # Cohere
         elif "c4ai-command-a-03-2025" in lowered_model_name and transformers_version < Version("4.50.0.dev0"):
             raise RuntimeError("Unsloth: Cohere's Command model only works on transformers >= 4.50.0." + NIGHTLY)
@@ -593,25 +589,25 @@ def from_pretrained(
                 ";"
         # Granite 4
         elif 'granite-4' in lowered_model_name:
-            # granite-4 rms norms are stored as 16 bit, but we upcast
-            os.environ["UNSLOTH_UPCAST_LAYERNORM"] = "1"
+            # Granite-4 rms norms are stored as 16 bit, but we upcast
+            os.environ["UNSLOTH_HIGH_PRECISION_LAYERNORM"] = "1"
             os.environ["UNSLOTH_DISABLE_STATIC_GENERATION"] = "1"
         # Olmo 2
         elif "olmo-2" in lowered_model_name and transformers_version < Version("4.50.0.dev0"):
             raise RuntimeError("Unsloth: OLMo-2 only works on transformers >= 4.50.0." + NIGHTLY)
         # Gemma 3N
         elif "gemma-3n" in lowered_model_name:
+            if transformers_version < Version("4.53.0"):
+                raise RuntimeError("Unsloth: Gemma 3N only works on transformers >= 4.53.0" + LATEST)
             os.environ["UNSLOTH_DISABLE_STATIC_GENERATION"] = "1"
-            # Set norms to float32 since anyways they get upcasted to float32
             os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \
                 "float16;torch.float16;torch.float16;"\
                 "if name.endswith('norm'): "\
                 "module._pre_set_compute_dtype = torch.float32\n"\
                 ";"\
                 "from unsloth_zoo.temporary_patches.gemma3n import patch_Gemma3nConvNormAct_forward; patch_Gemma3nConvNormAct_forward()"
-            
-            if transformers_version < Version("4.53.0"):
-                raise RuntimeError("Unsloth: Gemma 3N only works on transformers >= 4.53.0" + LATEST)
+            # Set norms to float32 since anyways they get upcasted to float32
+            os.environ["UNSLOTH_HIGH_PRECISION_LAYERNORM"] = "1"
         elif "falcon-h1" in lowered_model_name:
             # Falcon must use float32 Triton ie TRITON_F32_DEFAULT = 'ieee'
             # since Mamba kernels error out on using lower precision
@@ -638,8 +634,6 @@ def from_pretrained(
                     "if hasattr(module, x): "\
                     "setattr(module, x, torch.nn.Parameter(getattr(module, x).to(torch.float32)) if isinstance(getattr(module, x), torch.nn.Parameter) else getattr(module, x).to(torch.float32))\n"\
                     ""\
-                    "if name.endswith('norm'): "\
-                    "module._pre_set_compute_dtype = torch.float32\n"\
                     ";"
             else:
                 # Set down projection compute dtype to be float32 for float16 machines
@@ -650,9 +644,9 @@ def from_pretrained(
                     "torch.amax(dequantize_module_weight(module)) >= 1024:"\
                     "module._pre_set_compute_dtype = torch.float32\n"\
                     ""\
-                    "if name.endswith('norm'): "\
-                    "module._pre_set_compute_dtype = torch.float32\n"\
                     ";"
+            # Set norms to float32 since anyways they get upcasted to float32
+            os.environ["UNSLOTH_HIGH_PRECISION_LAYERNORM"] = "1"
         else:
             for check_model_name in DISABLE_COMPILE_MODEL_NAMES:
                 if check_model_name in lowered_model_name:
diff --git a/unsloth/models/vision.py b/unsloth/models/vision.py
index a5de457cef..6790c5cd12 100644
--- a/unsloth/models/vision.py
+++ b/unsloth/models/vision.py
@@ -455,6 +455,12 @@ def from_pretrained(
         # Return old flag
         os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = old_hf_transfer
 
+        # Check float32 norm weights
+        if os.environ.get("UNSLOTH_HIGH_PRECISION_LAYERNORM", "0") == "1":
+            for jj, (name, module) in enumerate(model.named_modules()):
+                if name.endswith("norm") and hasattr(module, "weight"):
+                    module._pre_set_compute_dtype = torch.float32
+        pass
         # Edit data-types
         if custom_datatype is not None:
             with torch.no_grad():

From b8a34b4a5eeeddab69320aed0097a801d7d0b1b8 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Sun, 17 Aug 2025 16:45:46 -0700
Subject: [PATCH 031/154] Update llama.py

---
 unsloth/models/llama.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/unsloth/models/llama.py b/unsloth/models/llama.py
index ae03a685eb..7217c0b593 100644
--- a/unsloth/models/llama.py
+++ b/unsloth/models/llama.py
@@ -170,6 +170,7 @@ def needs_device_kw(fn) -> bool:
 
     if "cache_position" in kwargs:
         kwargs["position_ids"] = kwargs["cache_position"]
+    print(attention_mask)
     return { "input_ids" : input_ids, "attention_mask": attention_mask, **kwargs, }
 pass
 

From 509fcb5ea138a7f7d29d033399b0fd0d953499e4 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Sun, 17 Aug 2025 16:55:02 -0700
Subject: [PATCH 032/154] Update llama.py

---
 unsloth/models/llama.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/unsloth/models/llama.py b/unsloth/models/llama.py
index 7217c0b593..6beb9943e8 100644
--- a/unsloth/models/llama.py
+++ b/unsloth/models/llama.py
@@ -170,7 +170,6 @@ def needs_device_kw(fn) -> bool:
 
     if "cache_position" in kwargs:
         kwargs["position_ids"] = kwargs["cache_position"]
-    print(attention_mask)
     return { "input_ids" : input_ids, "attention_mask": attention_mask, **kwargs, }
 pass
 
@@ -798,6 +797,7 @@ def LlamaModel_fast_forward(
     pass
 
     # Ignore attention_mask
+    print(attention_mask)
     if attention_mask is None:
         padding_mask = None
     elif self.training:

From 27f1a2efc64f75eade35e5322b2278bbb1b8812a Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Sun, 17 Aug 2025 17:38:42 -0700
Subject: [PATCH 033/154] Update llama.py

---
 unsloth/models/llama.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/unsloth/models/llama.py b/unsloth/models/llama.py
index 6beb9943e8..763d69a5b8 100644
--- a/unsloth/models/llama.py
+++ b/unsloth/models/llama.py
@@ -797,7 +797,7 @@ def LlamaModel_fast_forward(
     pass
 
     # Ignore attention_mask
-    print(attention_mask)
+    print(attention_mask, attention_mask.dtype, attention_mask.shape, attention_mask[:, :, 0])
     if attention_mask is None:
         padding_mask = None
     elif self.training:

From 931851abfdd6fea51c72eee6afdc4809fec14bc3 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Sun, 17 Aug 2025 17:51:17 -0700
Subject: [PATCH 034/154] Update llama.py

---
 unsloth/models/llama.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/unsloth/models/llama.py b/unsloth/models/llama.py
index 763d69a5b8..7cb39f9c77 100644
--- a/unsloth/models/llama.py
+++ b/unsloth/models/llama.py
@@ -797,7 +797,7 @@ def LlamaModel_fast_forward(
     pass
 
     # Ignore attention_mask
-    print(attention_mask, attention_mask.dtype, attention_mask.shape, attention_mask[:, :, 0])
+    print(attention_mask, attention_mask.dtype, attention_mask.shape, attention_mask)
     if attention_mask is None:
         padding_mask = None
     elif self.training:

From 3b9057bf81aedafba9c7d30f7e3eca80486bec07 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Sun, 17 Aug 2025 19:16:35 -0700
Subject: [PATCH 035/154] Update llama.py

---
 unsloth/models/llama.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/unsloth/models/llama.py b/unsloth/models/llama.py
index 7cb39f9c77..4100afc60e 100644
--- a/unsloth/models/llama.py
+++ b/unsloth/models/llama.py
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
+global final_attention_mask
 import torch
 import gc
 import math
@@ -797,7 +797,10 @@ def LlamaModel_fast_forward(
     pass
 
     # Ignore attention_mask
-    print(attention_mask, attention_mask.dtype, attention_mask.shape, attention_mask)
+    if "RAISE_ATTENTION_MASK" in os.environ:
+        global final_attention_mask
+        final_attention_mask = attention_mask
+        raise
     if attention_mask is None:
         padding_mask = None
     elif self.training:

From 3dd87bb0ccc3886611f7fe60e24ec97393c47342 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Mon, 18 Aug 2025 03:10:07 -0700
Subject: [PATCH 036/154] Update llama.py

---
 unsloth/models/llama.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/unsloth/models/llama.py b/unsloth/models/llama.py
index 4100afc60e..ae03a685eb 100644
--- a/unsloth/models/llama.py
+++ b/unsloth/models/llama.py
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-global final_attention_mask
+
 import torch
 import gc
 import math
@@ -797,10 +797,6 @@ def LlamaModel_fast_forward(
     pass
 
     # Ignore attention_mask
-    if "RAISE_ATTENTION_MASK" in os.environ:
-        global final_attention_mask
-        final_attention_mask = attention_mask
-        raise
     if attention_mask is None:
         padding_mask = None
     elif self.training:

From b757faf23e7c4cdbc5eee85c39f4841fd9841450 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Mon, 18 Aug 2025 05:36:47 -0700
Subject: [PATCH 037/154] Update save.py

---
 unsloth/save.py | 41 +++++++++++++++++++++++++++++++++++++----
 1 file changed, 37 insertions(+), 4 deletions(-)

diff --git a/unsloth/save.py b/unsloth/save.py
index e6d09b78fa..ef9c84e925 100644
--- a/unsloth/save.py
+++ b/unsloth/save.py
@@ -1195,6 +1195,41 @@ def save_to_gguf(
             f"--outfile {final_location} --vocab-type {vocab_type} "\
             f"--outtype {first_conversion} --concurrency {n_cpus} --pad-vocab"
     else:
+        # Fix up conversion script is possible
+        with open(convert_location, "rb") as f: converter_latest = f.read()
+        # Fix metadata
+        converter_latest = re.sub(
+            rb"(self\.metadata \= .+?\(.+?\)"\
+            rb"[\n]{1,}([\s]{4,}))",
+            rb"\1"\
+            rb"if hasattr(self.metadata, 'quantized_by'): self.metadata.quantized_by = 'Unsloth'\n"\
+            rb"\2if hasattr(self.metadata, 'repo_url'): self.metadata.repo_url = 'https://huggingface.co/unsloth'\n"\
+            rb"\2if hasattr(self.metadata, 'tags'): self.metadata.tags = ['unsloth', 'llama.cpp']\n"\
+            rb"\2",
+            converter_latest,
+        )
+
+        # Make mistral_common optional for now
+        # from x import y
+        converter_latest = re.sub(
+            rb"(from mistral_common[^\n\(]{1,})[\s]{0,}\n",
+            rb"try:\n    \1\nexcept:\n    pass\n",
+            converter_latest,
+        )
+        # from x import (y, z,)
+        converter_latest = re.sub(
+            rb"(from mistral_common[^\n\(]{1,}[\s]{0,}\(.+?\))",
+            rb"try:\n    \1\nexcept:\n    pass\n",
+            converter_latest,
+            flags = re.MULTILINE | re.DOTALL,
+        )
+
+        try:
+            # Write file
+            with open(convert_location, "wb") as file:
+                file.write(converter_latest)
+        except:
+            pass
         command = f"python {convert_location} {model_directory} "\
             f"--outfile {final_location} "\
             f"--outtype {first_conversion}"
@@ -1694,7 +1729,7 @@ def push_to_ollama_hub(username: str, model_name: str, tag: str):
         print(f"\nMODEL PUBLISHED FAILED WITH RETURN CODE {return_code}")
     else:
         print("\nMODEL PUBLISHED SUCCESSFULLY")
-
+pass
 
 def push_to_ollama(
     tokenizer,
@@ -1726,9 +1761,7 @@ def push_to_ollama(
     )
 
     print("Successfully pushed to ollama")
-
-
-
+pass
 
 
 def unsloth_save_pretrained_gguf(

From 2e86333f332204c613a2e5636b88f0e1ef34487d Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Mon, 18 Aug 2025 05:42:11 -0700
Subject: [PATCH 038/154] Update rl.py

---
 unsloth/models/rl.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/unsloth/models/rl.py b/unsloth/models/rl.py
index e751ef5e30..b08d4eda62 100644
--- a/unsloth/models/rl.py
+++ b/unsloth/models/rl.py
@@ -487,6 +487,8 @@ def _patch_trl_rl_trainers(trainer_file = "grpo_trainer"):
         "logging_steps"                 : 1,
         "max_seq_length"                : None,
         "num_generations"               : 8,
+        "steps_per_generation"          : 1, # Otherwise defaults to ga_steps which is wrong
+        "generation_batch_size"         : None, # Useless. If steps_per_generation set, generation_batch_size clashes
         "top_k"                         : None,
         "vllm_mode"                     : "colocate",
         "generation_kwargs"             : {},

From b01e948b8d351ce1a8ae41de55e8dc7a7648bc32 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Mon, 18 Aug 2025 05:44:09 -0700
Subject: [PATCH 039/154] Update pyproject.toml

---
 pyproject.toml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 6f6f225bde..f8558a83b6 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -37,7 +37,7 @@ triton = [
 ]
 
 huggingface = [
-    "unsloth_zoo>=2025.8.5",
+    "unsloth_zoo>=2025.8.6",
     "packaging",
     "tyro",
     "transformers>=4.51.3,!=4.47.0,!=4.52.0,!=4.52.1,!=4.52.2,!=4.52.3,!=4.53.0",
@@ -384,7 +384,7 @@ colab-ampere-torch220 = [
     "flash-attn>=2.6.3",
 ]
 colab-new = [
-    "unsloth_zoo>=2025.8.5",
+    "unsloth_zoo>=2025.8.6",
     "packaging",
     "tyro",
     "transformers>=4.51.3,!=4.47.0,!=4.52.0,!=4.52.1,!=4.52.2,!=4.52.3,!=4.53.0",

From a751fd789636a36ba1edd75775946a1339689e00 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Mon, 18 Aug 2025 06:07:29 -0700
Subject: [PATCH 040/154] Update rl.py

---
 unsloth/models/rl.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/unsloth/models/rl.py b/unsloth/models/rl.py
index b08d4eda62..52b1e83694 100644
--- a/unsloth/models/rl.py
+++ b/unsloth/models/rl.py
@@ -487,8 +487,8 @@ def _patch_trl_rl_trainers(trainer_file = "grpo_trainer"):
         "logging_steps"                 : 1,
         "max_seq_length"                : None,
         "num_generations"               : 8,
-        "steps_per_generation"          : 1, # Otherwise defaults to ga_steps which is wrong
-        "generation_batch_size"         : None, # Useless. If steps_per_generation set, generation_batch_size clashes
+        # "steps_per_generation"          : 1, # Otherwise defaults to ga_steps which is wrong
+        # "generation_batch_size"         : None, # Useless. If steps_per_generation set, generation_batch_size clashes
         "top_k"                         : None,
         "vllm_mode"                     : "colocate",
         "generation_kwargs"             : {},

From 3cb6eaf68bda8bb8bad74bd2087c6f1aa366d80e Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Mon, 18 Aug 2025 06:24:30 -0700
Subject: [PATCH 041/154] Update rl_replacements.py

---
 unsloth/models/rl_replacements.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/unsloth/models/rl_replacements.py b/unsloth/models/rl_replacements.py
index 2555f0df1f..717e6cbf11 100644
--- a/unsloth/models/rl_replacements.py
+++ b/unsloth/models/rl_replacements.py
@@ -556,7 +556,7 @@ def grpo_trainer_fix_batch_size(RLTrainer_source, RLConfig_source):
     "    per_device_train_batch_size = num_generations\n"
     return check_batch_size
 pass
-RL_CONFIG_CHANGES["grpo_trainer"].append(grpo_trainer_fix_batch_size)
+# RL_CONFIG_CHANGES["grpo_trainer"].append(grpo_trainer_fix_batch_size)
 
 
 # Add other reward function names

From de77a26c00cbc93050e103cf5060e54eac72b15c Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Mon, 18 Aug 2025 21:02:30 -0700
Subject: [PATCH 042/154] Update rl.py

---
 unsloth/models/rl.py | 24 ++++++++++++++++++++++--
 1 file changed, 22 insertions(+), 2 deletions(-)

diff --git a/unsloth/models/rl.py b/unsloth/models/rl.py
index 52b1e83694..4dabdee639 100644
--- a/unsloth/models/rl.py
+++ b/unsloth/models/rl.py
@@ -133,15 +133,18 @@ class Unsloth{RLConfig_name}({RLConfig_name}):
         default = -1,
         metadata = {{'help': 'Chunk size to reduce memory usage. -1 is most efficient.'}},
     )
+    {max_seq_length_pre}
     def __init__({RLConfig_arguments},
         vllm_sampling_params = None,
         unsloth_num_chunks = -1,
+        {max_seq_length_call}
         **kwargs,
     ):
 {RLConfig_extra_args}
         super().__init__({RLConfig_call_args}{RLConfig_kwargs})
         self.vllm_sampling_params = vllm_sampling_params
         self.unsloth_num_chunks = unsloth_num_chunks
+        {max_seq_length_post}
 pass
 
 {RLTrainer_extras}
@@ -266,6 +269,21 @@ def _patch_trl_rl_trainers(trainer_file = "grpo_trainer"):
         extra_args += mixed_precision
     pass
 
+    # Check if max_seq_length is NOT defined (max_length is now default)
+    if "max_seq_length" not in call_args and "max_length" in call_args:
+        max_seq_length_pre = \
+            """max_seq_length : Optional[int] = field(
+        default = None,
+        metadata = {{'help': 'Maximum sequence length to truncate to.'}},
+    )"""
+        max_seq_length_call = "max_seq_length = max_seq_length,"
+        max_seq_length_post = "self.max_seq_length = max_seq_length"
+    else:
+        max_seq_length_pre = ""
+        max_seq_length_call = ""
+        max_seq_length_post = ""
+    pass
+
     # Check if per_device_eval_batch_size (default 8) bigger than bsz
     # Also use FP16 / BF16 evaluation
     if "args" in call_args:
@@ -353,9 +371,7 @@ def _patch_trl_rl_trainers(trainer_file = "grpo_trainer"):
             "            max_length = args.max_length\n"\
             "    else:\n"\
             "        model_max_length = getattr(model, 'max_seq_length', None)\n"\
-            "        # print(model_max_length, 'mml1')\n"\
             "        if model_max_length is None: model_max_length = getattr(model, 'max_length', None)\n"\
-            "        # print(model_max_length, 'mml2')\n"\
             "        if model_max_length is not None:\n"\
             "            args.max_length = model_max_length\n"\
             "            max_length = args.max_length\n"\
@@ -666,6 +682,10 @@ def _patch_trl_rl_trainers(trainer_file = "grpo_trainer"):
         RLTrainer_post       = RLTrainer_post,
         RL_pre               = RL_pre,
 
+        max_seq_length_pre   = max_seq_length_pre,
+        max_seq_length_call  = max_seq_length_call,
+        max_seq_length_post  = max_seq_length_post,
+
         selective_log_softmax_code = selective_log_softmax_code,
     )
 

From 27ca53180d68e80818e8e40f03e85d6abd897401 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Mon, 18 Aug 2025 21:08:45 -0700
Subject: [PATCH 043/154] Update rl.py

---
 unsloth/models/rl.py | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/unsloth/models/rl.py b/unsloth/models/rl.py
index 4dabdee639..f21bcbe4db 100644
--- a/unsloth/models/rl.py
+++ b/unsloth/models/rl.py
@@ -269,21 +269,6 @@ def _patch_trl_rl_trainers(trainer_file = "grpo_trainer"):
         extra_args += mixed_precision
     pass
 
-    # Check if max_seq_length is NOT defined (max_length is now default)
-    if "max_seq_length" not in call_args and "max_length" in call_args:
-        max_seq_length_pre = \
-            """max_seq_length : Optional[int] = field(
-        default = None,
-        metadata = {{'help': 'Maximum sequence length to truncate to.'}},
-    )"""
-        max_seq_length_call = "max_seq_length = max_seq_length,"
-        max_seq_length_post = "self.max_seq_length = max_seq_length"
-    else:
-        max_seq_length_pre = ""
-        max_seq_length_call = ""
-        max_seq_length_post = ""
-    pass
-
     # Check if per_device_eval_batch_size (default 8) bigger than bsz
     # Also use FP16 / BF16 evaluation
     if "args" in call_args:
@@ -551,6 +536,21 @@ def _patch_trl_rl_trainers(trainer_file = "grpo_trainer"):
         extra_args += learning_rate_check
     pass
 
+    # Check if max_seq_length is NOT defined (max_length is now default)
+    if "max_seq_length" not in call_args and "max_length" in call_args:
+        max_seq_length_pre = \
+            """max_seq_length : Optional[int] = field(
+        default = None,
+        metadata = {{'help': 'Maximum sequence length to truncate to.'}},
+    )"""
+        max_seq_length_call = "max_seq_length = max_seq_length,"
+        max_seq_length_post = "self.max_seq_length = max_seq_length"
+    else:
+        max_seq_length_pre = ""
+        max_seq_length_call = ""
+        max_seq_length_post = ""
+    pass
+
     # Add output_dir saving
     if "output_dir" in call_args:
         # Default checks

From 6514c8ee55baf15360f5bf840dcaf6e8cf9eeb0f Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Mon, 18 Aug 2025 21:10:05 -0700
Subject: [PATCH 044/154] Update rl.py

---
 unsloth/models/rl.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/unsloth/models/rl.py b/unsloth/models/rl.py
index f21bcbe4db..afa6b25731 100644
--- a/unsloth/models/rl.py
+++ b/unsloth/models/rl.py
@@ -541,7 +541,7 @@ def _patch_trl_rl_trainers(trainer_file = "grpo_trainer"):
         max_seq_length_pre = \
             """max_seq_length : Optional[int] = field(
         default = None,
-        metadata = {{'help': 'Maximum sequence length to truncate to.'}},
+        metadata = {'help': 'Maximum sequence length to truncate to.'},
     )"""
         max_seq_length_call = "max_seq_length = max_seq_length,"
         max_seq_length_post = "self.max_seq_length = max_seq_length"

From 3e29ae7ca8fa2ef130a3dedce365d5c33a7d63b7 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Mon, 18 Aug 2025 22:41:37 -0700
Subject: [PATCH 045/154] Update _utils.py

---
 unsloth/models/_utils.py | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/unsloth/models/_utils.py b/unsloth/models/_utils.py
index 749becf098..dd1798f105 100644
--- a/unsloth/models/_utils.py
+++ b/unsloth/models/_utils.py
@@ -273,6 +273,38 @@ def filter(self, x): return not (self.text in x.getMessage())
 except:
     pass
 
+# Using a slow image processor as `use_fast`
+try:
+    from transformers.processing_utils import logger as processing_utils_logger
+    processing_utils_logger.addFilter(HideLoggingMessage("`use_fast`"))
+    del processing_utils_logger
+except:
+    pass
+
+# Using a slow image processor as `use_fast`
+try:
+    from transformers.models.auto.image_processing_auto import logger as processing_utils_logger
+    processing_utils_logger.addFilter(HideLoggingMessage("`use_fast`"))
+    del processing_utils_logger
+except:
+    pass
+
+# `use_cache=True` is incompatible with gradient checkpointing
+try:
+    from transformers.trainer import logger as trainer_logger
+    trainer_logger.addFilter(HideLoggingMessage("`use_cache=True`"))
+    del trainer_logger
+except:
+    pass
+
+# `use_cache=True` is incompatible with gradient checkpointing
+try:
+    from transformers.utils.generic import logger as trainer_logger
+    trainer_logger.addFilter(HideLoggingMessage("`use_cache=True`"))
+    del trainer_logger
+except:
+    pass
+
 # Errors out on
 # Some weights of Gemma3nForConditionalGeneration were not initialized from the model checkpoint
 from transformers.modeling_utils import logger as transformers_logger

From a42f6247d09a42ce858a4ce6af733463c2eb958b Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Tue, 19 Aug 2025 02:33:58 -0700
Subject: [PATCH 046/154] Update __init__.py

---
 unsloth/__init__.py | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/unsloth/__init__.py b/unsloth/__init__.py
index 5d9ddbd43f..1055dfb3eb 100644
--- a/unsloth/__init__.py
+++ b/unsloth/__init__.py
@@ -12,6 +12,31 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+try:
+    # Fix up AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'
+    # MUST do this at the start primarily due to tensorflow causing issues
+    import google.protobuf.message_factory
+    class MessageFactory:
+        def CreatePrototype(self, *args, **kwargs): return
+        def GetMessages(self, *args, **kwargs): return
+        def GetPrototype(self, *args, **kwargs): return
+    if not hasattr(google.protobuf.message_factory, "MessageFactory"):
+        google.protobuf.message_factory.MessageFactory = MessageFactory
+    elif hasattr(google.protobuf.message_factory, "MessageFactory") and \
+        not hasattr(google.protobuf.message_factory.MessageFactory, "GetPrototype") and \
+        not hasattr(google.protobuf.message_factory, "GetMessageClass"):
+        google.protobuf.message_factory.MessageFactory = MessageFactory
+    elif hasattr(google.protobuf.message_factory, "MessageFactory") and \
+        not hasattr(google.protobuf.message_factory.MessageFactory, "GetPrototype") and \
+        hasattr(google.protobuf.message_factory, "GetMessageClass"):
+        GetMessageClass = google.protobuf.message_factory.GetMessageClass
+        def GetPrototype(self, descriptor):
+            return GetMessageClass(descriptor)
+        google.protobuf.message_factory.MessageFactory.GetPrototype = GetPrototype
+    pass
+except:
+    pass
+
 import warnings, importlib, sys
 from packaging.version import Version
 import os, re, subprocess, inspect

From 9437f9e269d28070c2ee68abd6dce087b0cb78f4 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Tue, 19 Aug 2025 03:14:46 -0700
Subject: [PATCH 047/154] Torch 2.8

---
 pyproject.toml           | 112 ++++++++++++++++++++++++++++++++++++++-
 unsloth/_auto_install.py |   6 ++-
 2 files changed, 116 insertions(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index f8558a83b6..0462327beb 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -207,6 +207,16 @@ cu126onlytorch260 = [
     "xformers @ https://download.pytorch.org/whl/cu126/xformers-0.0.29.post3-cp311-cp311-win_amd64.whl ; python_version=='3.11' and platform_system == 'Windows'",
     "xformers @ https://download.pytorch.org/whl/cu126/xformers-0.0.29.post3-cp312-cp312-win_amd64.whl ; python_version=='3.12' and platform_system == 'Windows'",
 ]
+cu118onlytorch270 = [
+    "xformers @ https://download.pytorch.org/whl/cu118/xformers-0.0.30-cp39-cp39-manylinux_2_28_x86_64.whl ; python_version=='3.9' and platform_system == 'Linux'",
+    "xformers @ https://download.pytorch.org/whl/cu118/xformers-0.0.30-cp310-cp310-manylinux_2_28_x86_64.whl ; python_version=='3.10' and platform_system == 'Linux'",
+    "xformers @ https://download.pytorch.org/whl/cu118/xformers-0.0.30-cp311-cp311-manylinux_2_28_x86_64.whl ; python_version=='3.11' and platform_system == 'Linux'",
+    "xformers @ https://download.pytorch.org/whl/cu118/xformers-0.0.30-cp312-cp312-manylinux_2_28_x86_64.whl ; python_version=='3.12' and platform_system == 'Linux'",
+    "xformers @ https://download.pytorch.org/whl/cu118/xformers-0.0.30-cp39-cp39-win_amd64.whl ; python_version=='3.9' and platform_system == 'Windows'",
+    "xformers @ https://download.pytorch.org/whl/cu118/xformers-0.0.30-cp310-cp310-win_amd64.whl ; python_version=='3.10' and platform_system == 'Windows'",
+    "xformers @ https://download.pytorch.org/whl/cu118/xformers-0.0.30-cp311-cp311-win_amd64.whl ; python_version=='3.11' and platform_system == 'Windows'",
+    "xformers @ https://download.pytorch.org/whl/cu118/xformers-0.0.30-cp312-cp312-win_amd64.whl ; python_version=='3.12' and platform_system == 'Windows'",
+]
 cu126onlytorch270 = [
     "xformers @ https://download.pytorch.org/whl/cu126/xformers-0.0.30-cp39-cp39-manylinux_2_28_x86_64.whl ; python_version=='3.9' and platform_system == 'Linux'",
     "xformers @ https://download.pytorch.org/whl/cu126/xformers-0.0.30-cp310-cp310-manylinux_2_28_x86_64.whl ; python_version=='3.10' and platform_system == 'Linux'",
@@ -227,6 +237,30 @@ cu128onlytorch270 = [
     "xformers @ https://download.pytorch.org/whl/cu128/xformers-0.0.30-cp311-cp311-win_amd64.whl ; python_version=='3.11' and platform_system == 'Windows'",
     "xformers @ https://download.pytorch.org/whl/cu128/xformers-0.0.30-cp312-cp312-win_amd64.whl ; python_version=='3.12' and platform_system == 'Windows'",
 ]
+cu118onlytorch271 = [
+    "xformers @ https://download.pytorch.org/whl/cu118/xformers-0.0.31.post1-cp39-abi3-manylinux_2_28_x86_64.whl ; platform_system == 'Linux'",
+    "xformers @ https://download.pytorch.org/whl/cu118/xformers-0.0.31.post1-cp39-abi3-win_amd64.whl ; platform_system == 'Windows'",
+]
+cu126onlytorch271 = [
+    "xformers @ https://download.pytorch.org/whl/cu126/xformers-0.0.31.post1-cp39-abi3-manylinux_2_28_x86_64.whl ; platform_system == 'Linux'",
+    "xformers @ https://download.pytorch.org/whl/cu126/xformers-0.0.31.post1-cp39-abi3-win_amd64.whl ; platform_system == 'Windows'",
+]
+cu128onlytorch271 = [
+    "xformers @ https://download.pytorch.org/whl/cu128/xformers-0.0.31.post1-cp39-abi3-manylinux_2_28_x86_64.whl ; platform_system == 'Linux'",
+    "xformers @ https://download.pytorch.org/whl/cu128/xformers-0.0.31.post1-cp39-abi3-win_amd64.whl ; platform_system == 'Windows'",
+]
+cu118onlytorch280 = [
+    "xformers @ https://download.pytorch.org/whl/cu126/xformers-0.0.32.post2-cp39-abi3-manylinux_2_28_x86_64.whl ; platform_system == 'Linux'",
+    "xformers @ https://download.pytorch.org/whl/cu126/xformers-0.0.32.post2-cp39-abi3-win_amd64.whl ; platform_system == 'Windows'",
+]
+cu126onlytorch280 = [
+    "xformers @ https://download.pytorch.org/whl/cu128/xformers-0.0.32.post2-cp39-abi3-manylinux_2_28_x86_64.whl ; platform_system == 'Linux'",
+    "xformers @ https://download.pytorch.org/whl/cu128/xformers-0.0.32.post2-cp39-abi3-win_amd64.whl ; platform_system == 'Windows'",
+]
+cu128onlytorch280 = [
+    "xformers @ https://download.pytorch.org/whl/cu129/xformers-0.0.32.post2-cp39-abi3-manylinux_2_28_x86_64.whl ; platform_system == 'Linux'",
+    "xformers @ https://download.pytorch.org/whl/cu129/xformers-0.0.32.post2-cp39-abi3-win_amd64.whl ; platform_system == 'Windows'",
+]
 cu118 = [
     "unsloth[huggingface]",
     "bitsandbytes>=0.45.5",
@@ -337,6 +371,11 @@ cu126-torch260 = [
     "bitsandbytes>=0.45.5",
     "unsloth[cu126onlytorch260]",
 ]
+cu118-torch270 = [
+    "unsloth[huggingface]",
+    "bitsandbytes>=0.45.5",
+    "unsloth[cu118onlytorch270]",
+]
 cu126-torch270 = [
     "unsloth[huggingface]",
     "bitsandbytes>=0.45.5",
@@ -347,6 +386,36 @@ cu128-torch270 = [
     "bitsandbytes>=0.45.5",
     "unsloth[cu128onlytorch270]",
 ]
+cu118-torch271 = [
+    "unsloth[huggingface]",
+    "bitsandbytes>=0.45.5",
+    "unsloth[cu118onlytorch271]",
+]
+cu126-torch271 = [
+    "unsloth[huggingface]",
+    "bitsandbytes>=0.45.5",
+    "unsloth[cu126onlytorch271]",
+]
+cu128-torch271 = [
+    "unsloth[huggingface]",
+    "bitsandbytes>=0.45.5",
+    "unsloth[cu128onlytorch271]",
+]
+cu118-torch280 = [
+    "unsloth[huggingface]",
+    "bitsandbytes>=0.45.5",
+    "unsloth[cu118onlytorch280]",
+]
+cu126-torch280 = [
+    "unsloth[huggingface]",
+    "bitsandbytes>=0.45.5",
+    "unsloth[cu126onlytorch280]",
+]
+cu128-torch280 = [
+    "unsloth[huggingface]",
+    "bitsandbytes>=0.45.5",
+    "unsloth[cu128onlytorch280]",
+]
 kaggle = [
     "unsloth[huggingface]",
 ]
@@ -540,6 +609,12 @@ cu126-ampere-torch260 = [
     "unsloth[cu126onlytorch260]",
     "unsloth[flashattention]",
 ]
+cu118-ampere-torch270 = [
+    "unsloth[huggingface]",
+    "bitsandbytes>=0.45.5",
+    "unsloth[cu118onlytorch270]",
+    "unsloth[flashattention]",
+]
 cu126-ampere-torch270 = [
     "unsloth[huggingface]",
     "bitsandbytes>=0.45.5",
@@ -552,7 +627,42 @@ cu128-ampere-torch270 = [
     "unsloth[cu128onlytorch270]",
     "unsloth[flashattention]",
 ]
-
+cu118-ampere-torch271 = [
+    "unsloth[huggingface]",
+    "bitsandbytes>=0.45.5",
+    "unsloth[cu118onlytorch271]",
+    "unsloth[flashattention]",
+]
+cu126-ampere-torch271 = [
+    "unsloth[huggingface]",
+    "bitsandbytes>=0.45.5",
+    "unsloth[cu126onlytorch271]",
+    "unsloth[flashattention]",
+]
+cu128-ampere-torch271 = [
+    "unsloth[huggingface]",
+    "bitsandbytes>=0.45.5",
+    "unsloth[cu128onlytorch271]",
+    "unsloth[flashattention]",
+]
+cu118-ampere-torch280 = [
+    "unsloth[huggingface]",
+    "bitsandbytes>=0.45.5",
+    "unsloth[cu118onlytorch280]",
+    "unsloth[flashattention]",
+]
+cu126-ampere-torch280 = [
+    "unsloth[huggingface]",
+    "bitsandbytes>=0.45.5",
+    "unsloth[cu126onlytorch280]",
+    "unsloth[flashattention]",
+]
+cu128-ampere-torch280 = [
+    "unsloth[huggingface]",
+    "bitsandbytes>=0.45.5",
+    "unsloth[cu128onlytorch280]",
+    "unsloth[flashattention]",
+]
 flashattentiontorch260abiFALSEcu12x = [
     "flash-attn @ https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp39-cp39-linux_x86_64.whl ; platform_system == 'Linux' and python_version == '3.9'",
     "flash-attn @ https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp310-cp310-linux_x86_64.whl ; platform_system == 'Linux' and python_version == '3.10'",
diff --git a/unsloth/_auto_install.py b/unsloth/_auto_install.py
index c8559394ed..27b23ed476 100644
--- a/unsloth/_auto_install.py
+++ b/unsloth/_auto_install.py
@@ -30,7 +30,11 @@
 elif v  < V('2.5.1'): x = 'cu{}{}-torch250'
 elif v <= V('2.5.1'): x = 'cu{}{}-torch251'
 elif v  < V('2.7.0'): x = 'cu{}{}-torch260'
-elif v  < V('2.8.0'): x = 'cu{}{}-torch270'
+elif v  < V('2.7.9'): x = 'cu{}{}-torch270'
+elif v  < V('2.8.0'): x = 'cu{}{}-torch271'
+elif v  < V('2.8.9'): x = 'cu{}{}-torch280'
 else: raise RuntimeError(f"Torch = {v} too new!")
+if v > V('2.6.9') and cuda not in ("11.8", "12.6", "12.8"):
+	raise RuntimeError(f"CUDA = {cuda} not supported!")
 x = x.format(cuda.replace(".", ""), "-ampere" if is_ampere else "")
 print(f'pip install --upgrade pip && pip install "unsloth[{x}] @ git+https://github.com/unslothai/unsloth.git"')
\ No newline at end of file

From 1dd99a2ebc8cf9b19d97ffffcc47bd27582f60cd Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Tue, 19 Aug 2025 03:16:34 -0700
Subject: [PATCH 048/154] Update rl_replacements.py

---
 unsloth/models/rl_replacements.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/unsloth/models/rl_replacements.py b/unsloth/models/rl_replacements.py
index 717e6cbf11..2555f0df1f 100644
--- a/unsloth/models/rl_replacements.py
+++ b/unsloth/models/rl_replacements.py
@@ -556,7 +556,7 @@ def grpo_trainer_fix_batch_size(RLTrainer_source, RLConfig_source):
     "    per_device_train_batch_size = num_generations\n"
     return check_batch_size
 pass
-# RL_CONFIG_CHANGES["grpo_trainer"].append(grpo_trainer_fix_batch_size)
+RL_CONFIG_CHANGES["grpo_trainer"].append(grpo_trainer_fix_batch_size)
 
 
 # Add other reward function names

From 5349cd0fa072105ab6904b5339b814eb7ed47b1e Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Wed, 20 Aug 2025 00:10:48 -0700
Subject: [PATCH 049/154] Update loader.py

---
 unsloth/models/loader.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py
index fae6ae0770..ce09049050 100644
--- a/unsloth/models/loader.py
+++ b/unsloth/models/loader.py
@@ -641,7 +641,7 @@ def from_pretrained(
                 os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \
                     "all;None;None;"\
                     "if 'down_projs' in name and hasattr(module, 'weight') and "\
-                    "torch.amax(dequantize_module_weight(module)) >= 1024:"\
+                    "torch.amax(dequantize_module_weight(module)) >= 512:"\
                     "module._pre_set_compute_dtype = torch.float32\n"\
                     ""\
                     ";"

From 5a344c2017830ee4a8ee02e81f0383ffd8b2016f Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Wed, 20 Aug 2025 00:14:49 -0700
Subject: [PATCH 050/154] UNSLOTH_ENABLE_CCE

---
 unsloth/__init__.py      | 6 ++++++
 unsloth/models/loader.py | 3 ---
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/unsloth/__init__.py b/unsloth/__init__.py
index a43dc4f70f..c6851546b5 100644
--- a/unsloth/__init__.py
+++ b/unsloth/__init__.py
@@ -104,6 +104,12 @@ def get_device_count():
     del os.environ["PYTORCH_CUDA_ALLOC_CONF"]
 pass
 
+# CCE fails on Torch 2.8 and above
+# OutOfResources: out of resource: shared memory, Required: 98304, Hardware limit: 65536. Reducing block sizes or `num_stages`
+if (major_torch >= 2 and minor_torch >= 8) or (major_torch > 2):
+    os.environ["UNSLOTH_ENABLE_CCE"] = "0"
+pass
+
 # Fix Xformers performance issues since 0.0.25
 import importlib.util
 from pathlib import Path
diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py
index ce09049050..94fd81d16d 100644
--- a/unsloth/models/loader.py
+++ b/unsloth/models/loader.py
@@ -618,9 +618,6 @@ def from_pretrained(
                 "os.environ['TRITON_F32_DEFAULT'] = 'ieee'"
         elif "gpt-oss" in lowered_model_name:
             os.environ["UNSLOTH_DISABLE_STATIC_GENERATION"] = "1"
-            # CCE fails on Tesla T4
-            # OutOfResources: out of resource: shared memory, Required: 98304, Hardware limit: 65536. Reducing block sizes or `num_stages`
-            os.environ["UNSLOTH_ENABLE_CCE"] = "0"
             if not load_in_4bit:
                 # Only upcast MoE biases for MXFP4, not BnB
                 # Set norms to float32 since anyways they get upcasted to float32

From e56363c9dcd8e7e34619261871ccf798872e0fe3 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Wed, 20 Aug 2025 00:40:23 -0700
Subject: [PATCH 051/154] Fix

---
 unsloth/__init__.py      | 2 +-
 unsloth/models/loader.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/unsloth/__init__.py b/unsloth/__init__.py
index c6851546b5..2c72092b57 100644
--- a/unsloth/__init__.py
+++ b/unsloth/__init__.py
@@ -93,7 +93,7 @@ def get_device_count():
 
 # We support Pytorch 2
 # Fixes https://github.com/unslothai/unsloth/issues/38
-torch_version = str(torch.__version__).split(".")
+torch_version = str(re.match(r"[0-9\.]{3,}", str(torch.__version__)).group(0)).split(".")
 major_torch, minor_torch = torch_version[0], torch_version[1]
 major_torch, minor_torch = int(major_torch), int(minor_torch)
 if (major_torch < 2):
diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py
index 94fd81d16d..00e942ea93 100644
--- a/unsloth/models/loader.py
+++ b/unsloth/models/loader.py
@@ -638,7 +638,7 @@ def from_pretrained(
                 os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \
                     "all;None;None;"\
                     "if 'down_projs' in name and hasattr(module, 'weight') and "\
-                    "torch.amax(dequantize_module_weight(module)) >= 512:"\
+                    "torch.amax(dequantize_module_weight(module)) >= 128:"\
                     "module._pre_set_compute_dtype = torch.float32\n"\
                     ""\
                     ";"

From c79aece5377480352b1b9eb5339d175551434745 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Wed, 20 Aug 2025 01:12:42 -0700
Subject: [PATCH 052/154] Update loader.py

---
 unsloth/models/loader.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py
index 00e942ea93..050e077a39 100644
--- a/unsloth/models/loader.py
+++ b/unsloth/models/loader.py
@@ -638,7 +638,7 @@ def from_pretrained(
                 os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \
                     "all;None;None;"\
                     "if 'down_projs' in name and hasattr(module, 'weight') and "\
-                    "torch.amax(dequantize_module_weight(module)) >= 128:"\
+                    "torch.amax(dequantize_module_weight(module)) >= 0:"\
                     "module._pre_set_compute_dtype = torch.float32\n"\
                     ""\
                     ";"

From c4b530cc29c08693ce139f4c8decdfb80aed6370 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Wed, 20 Aug 2025 01:32:44 -0700
Subject: [PATCH 053/154] Update loader.py

---
 unsloth/models/loader.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py
index 050e077a39..0ff765bf4c 100644
--- a/unsloth/models/loader.py
+++ b/unsloth/models/loader.py
@@ -638,7 +638,7 @@ def from_pretrained(
                 os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \
                     "all;None;None;"\
                     "if 'down_projs' in name and hasattr(module, 'weight') and "\
-                    "torch.amax(dequantize_module_weight(module)) >= 0:"\
+                    "torch.amax(dequantize_module_weight(module)) >= 1024:"\
                     "module._pre_set_compute_dtype = torch.float32\n"\
                     ""\
                     ";"

From 0913b585eaa4d81df1ab0d2fae09f7944f5178cb Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Wed, 20 Aug 2025 01:47:06 -0700
Subject: [PATCH 054/154] Update __init__.py

---
 unsloth/__init__.py | 51 +++++++++++++++++++++++----------------------
 1 file changed, 26 insertions(+), 25 deletions(-)

diff --git a/unsloth/__init__.py b/unsloth/__init__.py
index 2c72092b57..3cb3c2e492 100644
--- a/unsloth/__init__.py
+++ b/unsloth/__init__.py
@@ -53,6 +53,32 @@
 # Log Unsloth is being used
 os.environ["UNSLOTH_IS_PRESENT"] = "1"
 
+# Fix up AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'
+# MUST do this at the start primarily due to tensorflow causing issues
+try:
+    import google.protobuf.message_factory
+    class MessageFactory:
+        def CreatePrototype(self, *args, **kwargs): return
+        def GetMessages(self, *args, **kwargs): return
+        def GetPrototype(self, *args, **kwargs): return
+    if not hasattr(google.protobuf.message_factory, "MessageFactory"):
+        google.protobuf.message_factory.MessageFactory = MessageFactory
+    elif hasattr(google.protobuf.message_factory, "MessageFactory") and \
+        not hasattr(google.protobuf.message_factory.MessageFactory, "GetPrototype") and \
+        not hasattr(google.protobuf.message_factory, "GetMessageClass"):
+        google.protobuf.message_factory.MessageFactory = MessageFactory
+    elif hasattr(google.protobuf.message_factory, "MessageFactory") and \
+        not hasattr(google.protobuf.message_factory.MessageFactory, "GetPrototype") and \
+        hasattr(google.protobuf.message_factory, "GetMessageClass"):
+        GetMessageClass = google.protobuf.message_factory.GetMessageClass
+        def GetPrototype(self, descriptor):
+            return GetMessageClass(descriptor)
+        google.protobuf.message_factory.MessageFactory.GetPrototype = GetPrototype
+    pass
+except:
+    pass
+
+# Try importing PyTorch and check version
 try:
     import torch
 except ModuleNotFoundError:
@@ -246,31 +272,6 @@ def is_bf16_supported(): return SUPPORTS_BFLOAT16
     raise ImportError("Unsloth: Please install unsloth_zoo via `pip install unsloth_zoo`")
 pass
 
-try:
-    # Fix up AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'
-    # MUST do this at the start primarily due to tensorflow causing issues
-    import google.protobuf.message_factory
-    class MessageFactory:
-        def CreatePrototype(self, *args, **kwargs): return
-        def GetMessages(self, *args, **kwargs): return
-        def GetPrototype(self, *args, **kwargs): return
-    if not hasattr(google.protobuf.message_factory, "MessageFactory"):
-        google.protobuf.message_factory.MessageFactory = MessageFactory
-    elif hasattr(google.protobuf.message_factory, "MessageFactory") and \
-        not hasattr(google.protobuf.message_factory.MessageFactory, "GetPrototype") and \
-        not hasattr(google.protobuf.message_factory, "GetMessageClass"):
-        google.protobuf.message_factory.MessageFactory = MessageFactory
-    elif hasattr(google.protobuf.message_factory, "MessageFactory") and \
-        not hasattr(google.protobuf.message_factory.MessageFactory, "GetPrototype") and \
-        hasattr(google.protobuf.message_factory, "GetMessageClass"):
-        GetMessageClass = google.protobuf.message_factory.GetMessageClass
-        def GetPrototype(self, descriptor):
-            return GetMessageClass(descriptor)
-        google.protobuf.message_factory.MessageFactory.GetPrototype = GetPrototype
-    pass
-except:
-    pass
-
 from .models import *
 from .models import __version__
 from .save import *

From 374f703ee909c56536265e1cca71306a873abd46 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Wed, 20 Aug 2025 01:49:57 -0700
Subject: [PATCH 055/154] Update __init__.py

---
 unsloth/__init__.py | 50 ++++++++++++++++++++++-----------------------
 1 file changed, 25 insertions(+), 25 deletions(-)

diff --git a/unsloth/__init__.py b/unsloth/__init__.py
index 3cb3c2e492..0430e5704d 100644
--- a/unsloth/__init__.py
+++ b/unsloth/__init__.py
@@ -53,31 +53,6 @@
 # Log Unsloth is being used
 os.environ["UNSLOTH_IS_PRESENT"] = "1"
 
-# Fix up AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'
-# MUST do this at the start primarily due to tensorflow causing issues
-try:
-    import google.protobuf.message_factory
-    class MessageFactory:
-        def CreatePrototype(self, *args, **kwargs): return
-        def GetMessages(self, *args, **kwargs): return
-        def GetPrototype(self, *args, **kwargs): return
-    if not hasattr(google.protobuf.message_factory, "MessageFactory"):
-        google.protobuf.message_factory.MessageFactory = MessageFactory
-    elif hasattr(google.protobuf.message_factory, "MessageFactory") and \
-        not hasattr(google.protobuf.message_factory.MessageFactory, "GetPrototype") and \
-        not hasattr(google.protobuf.message_factory, "GetMessageClass"):
-        google.protobuf.message_factory.MessageFactory = MessageFactory
-    elif hasattr(google.protobuf.message_factory, "MessageFactory") and \
-        not hasattr(google.protobuf.message_factory.MessageFactory, "GetPrototype") and \
-        hasattr(google.protobuf.message_factory, "GetMessageClass"):
-        GetMessageClass = google.protobuf.message_factory.GetMessageClass
-        def GetPrototype(self, descriptor):
-            return GetMessageClass(descriptor)
-        google.protobuf.message_factory.MessageFactory.GetPrototype = GetPrototype
-    pass
-except:
-    pass
-
 # Try importing PyTorch and check version
 try:
     import torch
@@ -136,6 +111,31 @@ def get_device_count():
     os.environ["UNSLOTH_ENABLE_CCE"] = "0"
 pass
 
+# Fix up AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'
+# MUST do this at the start primarily due to tensorflow causing issues
+try:
+    import google.protobuf.message_factory
+    class MessageFactory:
+        def CreatePrototype(self, *args, **kwargs): return
+        def GetMessages(self, *args, **kwargs): return
+        def GetPrototype(self, *args, **kwargs): return
+    if not hasattr(google.protobuf.message_factory, "MessageFactory"):
+        google.protobuf.message_factory.MessageFactory = MessageFactory
+    elif hasattr(google.protobuf.message_factory, "MessageFactory") and \
+        not hasattr(google.protobuf.message_factory.MessageFactory, "GetPrototype") and \
+        not hasattr(google.protobuf.message_factory, "GetMessageClass"):
+        google.protobuf.message_factory.MessageFactory = MessageFactory
+    elif hasattr(google.protobuf.message_factory, "MessageFactory") and \
+        not hasattr(google.protobuf.message_factory.MessageFactory, "GetPrototype") and \
+        hasattr(google.protobuf.message_factory, "GetMessageClass"):
+        GetMessageClass = google.protobuf.message_factory.GetMessageClass
+        def GetPrototype(self, descriptor):
+            return GetMessageClass(descriptor)
+        google.protobuf.message_factory.MessageFactory.GetPrototype = GetPrototype
+    pass
+except:
+    pass
+
 # Fix Xformers performance issues since 0.0.25
 import importlib.util
 from pathlib import Path

From c0efbec6918a125859e10fa8c412d42e360548be Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Wed, 20 Aug 2025 01:51:18 -0700
Subject: [PATCH 056/154] Update __init__.py

---
 unsloth/__init__.py | 50 ++++++++++++++++++++++-----------------------
 1 file changed, 25 insertions(+), 25 deletions(-)

diff --git a/unsloth/__init__.py b/unsloth/__init__.py
index 0430e5704d..f34645651b 100644
--- a/unsloth/__init__.py
+++ b/unsloth/__init__.py
@@ -111,31 +111,6 @@ def get_device_count():
     os.environ["UNSLOTH_ENABLE_CCE"] = "0"
 pass
 
-# Fix up AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'
-# MUST do this at the start primarily due to tensorflow causing issues
-try:
-    import google.protobuf.message_factory
-    class MessageFactory:
-        def CreatePrototype(self, *args, **kwargs): return
-        def GetMessages(self, *args, **kwargs): return
-        def GetPrototype(self, *args, **kwargs): return
-    if not hasattr(google.protobuf.message_factory, "MessageFactory"):
-        google.protobuf.message_factory.MessageFactory = MessageFactory
-    elif hasattr(google.protobuf.message_factory, "MessageFactory") and \
-        not hasattr(google.protobuf.message_factory.MessageFactory, "GetPrototype") and \
-        not hasattr(google.protobuf.message_factory, "GetMessageClass"):
-        google.protobuf.message_factory.MessageFactory = MessageFactory
-    elif hasattr(google.protobuf.message_factory, "MessageFactory") and \
-        not hasattr(google.protobuf.message_factory.MessageFactory, "GetPrototype") and \
-        hasattr(google.protobuf.message_factory, "GetMessageClass"):
-        GetMessageClass = google.protobuf.message_factory.GetMessageClass
-        def GetPrototype(self, descriptor):
-            return GetMessageClass(descriptor)
-        google.protobuf.message_factory.MessageFactory.GetPrototype = GetPrototype
-    pass
-except:
-    pass
-
 # Fix Xformers performance issues since 0.0.25
 import importlib.util
 from pathlib import Path
@@ -272,6 +247,31 @@ def is_bf16_supported(): return SUPPORTS_BFLOAT16
     raise ImportError("Unsloth: Please install unsloth_zoo via `pip install unsloth_zoo`")
 pass
 
+# Fix up AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'
+# MUST do this at the start primarily due to tensorflow causing issues
+try:
+    import google.protobuf.message_factory
+    class MessageFactory:
+        def CreatePrototype(self, *args, **kwargs): return
+        def GetMessages(self, *args, **kwargs): return
+        def GetPrototype(self, *args, **kwargs): return
+    if not hasattr(google.protobuf.message_factory, "MessageFactory"):
+        google.protobuf.message_factory.MessageFactory = MessageFactory
+    elif hasattr(google.protobuf.message_factory, "MessageFactory") and \
+        not hasattr(google.protobuf.message_factory.MessageFactory, "GetPrototype") and \
+        not hasattr(google.protobuf.message_factory, "GetMessageClass"):
+        google.protobuf.message_factory.MessageFactory = MessageFactory
+    elif hasattr(google.protobuf.message_factory, "MessageFactory") and \
+        not hasattr(google.protobuf.message_factory.MessageFactory, "GetPrototype") and \
+        hasattr(google.protobuf.message_factory, "GetMessageClass"):
+        GetMessageClass = google.protobuf.message_factory.GetMessageClass
+        def GetPrototype(self, descriptor):
+            return GetMessageClass(descriptor)
+        google.protobuf.message_factory.MessageFactory.GetPrototype = GetPrototype
+    pass
+except:
+    pass
+
 from .models import *
 from .models import __version__
 from .save import *

From 761a4454a95b3ff9a6bc28c2f4ed5619df9b828f Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Wed, 20 Aug 2025 01:53:53 -0700
Subject: [PATCH 057/154] Update __init__.py

---
 unsloth/__init__.py | 42 +++++++++++++++++++++---------------------
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/unsloth/__init__.py b/unsloth/__init__.py
index f34645651b..95035b91b0 100644
--- a/unsloth/__init__.py
+++ b/unsloth/__init__.py
@@ -226,27 +226,6 @@ def is_bf16_supported(): return SUPPORTS_BFLOAT16
     # TODO: check triton for intel installed properly.
     pass
 
-# Check for unsloth_zoo
-try:
-    unsloth_zoo_version = importlib_version("unsloth_zoo")
-    if Version(unsloth_zoo_version) < Version("2025.8.1"):
-        print(
-            "Unsloth: Please update Unsloth and Unsloth-Zoo to the latest version!\n"\
-            "Do this via `pip install --upgrade --force-reinstall --no-cache-dir --no-deps unsloth unsloth_zoo`"
-        )
-        # if os.environ.get("UNSLOTH_DISABLE_AUTO_UPDATES", "0") == "0":
-        #     try:
-        #         os.system("pip install --upgrade --no-cache-dir --no-deps unsloth_zoo")
-        #     except:
-        #         try:
-        #             os.system("pip install --upgrade --no-cache-dir --no-deps --user unsloth_zoo")
-        #         except:
-        #             raise ImportError("Unsloth: Please update unsloth_zoo via `pip install --upgrade --no-cache-dir --no-deps unsloth_zoo`")
-    import unsloth_zoo
-except:
-    raise ImportError("Unsloth: Please install unsloth_zoo via `pip install unsloth_zoo`")
-pass
-
 # Fix up AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'
 # MUST do this at the start primarily due to tensorflow causing issues
 try:
@@ -272,6 +251,27 @@ def GetPrototype(self, descriptor):
 except:
     pass
 
+# Check for unsloth_zoo
+try:
+    unsloth_zoo_version = importlib_version("unsloth_zoo")
+    if Version(unsloth_zoo_version) < Version("2025.8.1"):
+        print(
+            "Unsloth: Please update Unsloth and Unsloth-Zoo to the latest version!\n"\
+            "Do this via `pip install --upgrade --force-reinstall --no-cache-dir --no-deps unsloth unsloth_zoo`"
+        )
+        # if os.environ.get("UNSLOTH_DISABLE_AUTO_UPDATES", "0") == "0":
+        #     try:
+        #         os.system("pip install --upgrade --no-cache-dir --no-deps unsloth_zoo")
+        #     except:
+        #         try:
+        #             os.system("pip install --upgrade --no-cache-dir --no-deps --user unsloth_zoo")
+        #         except:
+        #             raise ImportError("Unsloth: Please update unsloth_zoo via `pip install --upgrade --no-cache-dir --no-deps unsloth_zoo`")
+    import unsloth_zoo
+except:
+    raise ImportError("Unsloth: Please install unsloth_zoo via `pip install unsloth_zoo`")
+pass
+
 from .models import *
 from .models import __version__
 from .save import *

From 30ea44c17f2b4e60b77240c1cb1ec93610c57861 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Wed, 20 Aug 2025 01:56:58 -0700
Subject: [PATCH 058/154] Import fixes

---
 unsloth/__init__.py     | 30 ++++--------------------------
 unsloth/import_fixes.py | 40 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 44 insertions(+), 26 deletions(-)
 create mode 100644 unsloth/import_fixes.py

diff --git a/unsloth/__init__.py b/unsloth/__init__.py
index 95035b91b0..fd6bd7d499 100644
--- a/unsloth/__init__.py
+++ b/unsloth/__init__.py
@@ -17,6 +17,10 @@
 import os, re, subprocess, inspect
 import numpy as np
 
+# Fix some issues before importing other packages
+from .import_fixes import fix_message_factory_issue
+fix_message_factory_issue(); del fix_message_factory_issue;
+
 # Check if modules that need patching are already imported
 critical_modules = ['trl', 'transformers', 'peft']
 already_imported = [mod for mod in critical_modules if mod in sys.modules]
@@ -161,7 +165,6 @@ def is_bf16_supported(): return SUPPORTS_BFLOAT16
     SUPPORTS_BFLOAT16 = torch.xpu.is_bf16_supported()
 pass
 
-
 # For Gradio HF Spaces?
 # if "SPACE_AUTHOR_NAME" not in os.environ and "SPACE_REPO_NAME" not in os.environ:
 import triton
@@ -226,31 +229,6 @@ def is_bf16_supported(): return SUPPORTS_BFLOAT16
     # TODO: check triton for intel installed properly.
     pass
 
-# Fix up AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'
-# MUST do this at the start primarily due to tensorflow causing issues
-try:
-    import google.protobuf.message_factory
-    class MessageFactory:
-        def CreatePrototype(self, *args, **kwargs): return
-        def GetMessages(self, *args, **kwargs): return
-        def GetPrototype(self, *args, **kwargs): return
-    if not hasattr(google.protobuf.message_factory, "MessageFactory"):
-        google.protobuf.message_factory.MessageFactory = MessageFactory
-    elif hasattr(google.protobuf.message_factory, "MessageFactory") and \
-        not hasattr(google.protobuf.message_factory.MessageFactory, "GetPrototype") and \
-        not hasattr(google.protobuf.message_factory, "GetMessageClass"):
-        google.protobuf.message_factory.MessageFactory = MessageFactory
-    elif hasattr(google.protobuf.message_factory, "MessageFactory") and \
-        not hasattr(google.protobuf.message_factory.MessageFactory, "GetPrototype") and \
-        hasattr(google.protobuf.message_factory, "GetMessageClass"):
-        GetMessageClass = google.protobuf.message_factory.GetMessageClass
-        def GetPrototype(self, descriptor):
-            return GetMessageClass(descriptor)
-        google.protobuf.message_factory.MessageFactory.GetPrototype = GetPrototype
-    pass
-except:
-    pass
-
 # Check for unsloth_zoo
 try:
     unsloth_zoo_version = importlib_version("unsloth_zoo")
diff --git a/unsloth/import_fixes.py b/unsloth/import_fixes.py
new file mode 100644
index 0000000000..d265a09df0
--- /dev/null
+++ b/unsloth/import_fixes.py
@@ -0,0 +1,40 @@
+# Copyright 2023-present Daniel Han-Chen & the Unsloth team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+def fix_message_factory_issue():
+    # Fix up AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'
+    # MUST do this at the start primarily due to tensorflow causing issues
+    try:
+        import google.protobuf.message_factory
+        class MessageFactory:
+            def CreatePrototype(self, *args, **kwargs): return
+            def GetMessages(self, *args, **kwargs): return
+            def GetPrototype(self, *args, **kwargs): return
+        if not hasattr(google.protobuf.message_factory, "MessageFactory"):
+            google.protobuf.message_factory.MessageFactory = MessageFactory
+        elif hasattr(google.protobuf.message_factory, "MessageFactory") and \
+            not hasattr(google.protobuf.message_factory.MessageFactory, "GetPrototype") and \
+            not hasattr(google.protobuf.message_factory, "GetMessageClass"):
+            google.protobuf.message_factory.MessageFactory = MessageFactory
+        elif hasattr(google.protobuf.message_factory, "MessageFactory") and \
+            not hasattr(google.protobuf.message_factory.MessageFactory, "GetPrototype") and \
+            hasattr(google.protobuf.message_factory, "GetMessageClass"):
+            GetMessageClass = google.protobuf.message_factory.GetMessageClass
+            def GetPrototype(self, descriptor):
+                return GetMessageClass(descriptor)
+            google.protobuf.message_factory.MessageFactory.GetPrototype = GetPrototype
+        pass
+    except:
+        pass
+pass

From c45467cfd91d5d66308f5cbc8a6ab3cc90bec5d5 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Wed, 20 Aug 2025 02:00:51 -0700
Subject: [PATCH 059/154] Update loader.py

---
 unsloth/models/loader.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py
index 0ff765bf4c..72655782f9 100644
--- a/unsloth/models/loader.py
+++ b/unsloth/models/loader.py
@@ -637,8 +637,8 @@ def from_pretrained(
                 # Set norms to float32 since anyways they get upcasted to float32
                 os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \
                     "all;None;None;"\
-                    "if 'down_projs' in name and hasattr(module, 'weight') and "\
-                    "torch.amax(dequantize_module_weight(module)) >= 1024:"\
+                    "if hasattr(module, 'weight') and "\
+                    "torch.amax(dequantize_module_weight(module)) >= 1:"\
                     "module._pre_set_compute_dtype = torch.float32\n"\
                     ""\
                     ";"

From 55e4c78a943a52b9e0b46b29afae0f79e371573c Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Wed, 20 Aug 2025 02:15:10 -0700
Subject: [PATCH 060/154] Fix aimv2 issue

---
 unsloth/__init__.py     | 30 +++-------------
 unsloth/import_fixes.py | 79 +++++++++++++++++++++++++++++++++++++++--
 2 files changed, 82 insertions(+), 27 deletions(-)

diff --git a/unsloth/__init__.py b/unsloth/__init__.py
index fd6bd7d499..335db48775 100644
--- a/unsloth/__init__.py
+++ b/unsloth/__init__.py
@@ -115,35 +115,15 @@ def get_device_count():
     os.environ["UNSLOTH_ENABLE_CCE"] = "0"
 pass
 
-# Fix Xformers performance issues since 0.0.25
+# Fix other issues
 import importlib.util
 from pathlib import Path
 from importlib.metadata import version as importlib_version
 from packaging.version import Version
-try:
-    xformers_version = importlib_version("xformers")
-    if Version(xformers_version) < Version("0.0.29"):
-        xformers_location = importlib.util.find_spec("xformers").origin
-        xformers_location = os.path.split(xformers_location)[0]
-        cutlass = Path(xformers_location) / "ops" / "fmha" / "cutlass.py"
-
-        if cutlass.exists():
-            with open(cutlass, "r+", encoding = "utf-8") as f:
-                text = f.read()
-                # See https://github.com/facebookresearch/xformers/issues/1176#issuecomment-2545829591
-                if "num_splits_key=-1," in text:
-                    text = text.replace("num_splits_key=-1,", "num_splits_key=None,")
-                    f.seek(0)
-                    f.write(text)
-                    f.truncate()
-                    print("Unsloth: Patching Xformers to fix some performance issues.")
-                pass
-            pass
-        pass
-    pass
-except:
-    pass
-pass
+from .import_fixes import fix_xformers_performance_issue
+fix_xformers_performance_issue(); del fix_xformers_performance_issue;
+from .import_fixes import fix_vllm_aimv2_issue
+fix_vllm_aimv2_issue(); del fix_vllm_aimv2_issue;
 
 # Torch 2.4 has including_emulation
 if DEVICE_TYPE == "cuda":
diff --git a/unsloth/import_fixes.py b/unsloth/import_fixes.py
index d265a09df0..126aac6365 100644
--- a/unsloth/import_fixes.py
+++ b/unsloth/import_fixes.py
@@ -12,9 +12,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import os
+import importlib.util
+from pathlib import Path
+from importlib.metadata import version as importlib_version
+from packaging.version import Version
+UNSLOTH_ENABLE_LOGGING = os.environ.get("UNSLOTH_ENABLE_LOGGING", "0") == "1"
+
+# Fix up AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'
+# MUST do this at the start primarily due to tensorflow causing issues
 def fix_message_factory_issue():
-    # Fix up AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'
-    # MUST do this at the start primarily due to tensorflow causing issues
     try:
         import google.protobuf.message_factory
         class MessageFactory:
@@ -22,11 +29,15 @@ def CreatePrototype(self, *args, **kwargs): return
             def GetMessages(self, *args, **kwargs): return
             def GetPrototype(self, *args, **kwargs): return
         if not hasattr(google.protobuf.message_factory, "MessageFactory"):
+            if UNSLOTH_ENABLE_LOGGING:
+                print("Unsloth: Patching protobuf.MessageFactory as it doesn't exist")
             google.protobuf.message_factory.MessageFactory = MessageFactory
         elif hasattr(google.protobuf.message_factory, "MessageFactory") and \
             not hasattr(google.protobuf.message_factory.MessageFactory, "GetPrototype") and \
             not hasattr(google.protobuf.message_factory, "GetMessageClass"):
             google.protobuf.message_factory.MessageFactory = MessageFactory
+            if UNSLOTH_ENABLE_LOGGING:
+                print("Unsloth: Patching protobuf.MessageFactory as it doesn't exist")
         elif hasattr(google.protobuf.message_factory, "MessageFactory") and \
             not hasattr(google.protobuf.message_factory.MessageFactory, "GetPrototype") and \
             hasattr(google.protobuf.message_factory, "GetMessageClass"):
@@ -34,7 +45,71 @@ def GetPrototype(self, *args, **kwargs): return
             def GetPrototype(self, descriptor):
                 return GetMessageClass(descriptor)
             google.protobuf.message_factory.MessageFactory.GetPrototype = GetPrototype
+            if UNSLOTH_ENABLE_LOGGING:
+                print("Unsloth: Patching protobuf.MessageFactory.GetPrototype")
         pass
     except:
         pass
 pass
+
+# Fix Xformers performance issues since 0.0.25
+def fix_xformers_performance_issue():
+    xformers_version = importlib_version("xformers")
+    if Version(xformers_version) < Version("0.0.29"):
+        xformers_location = importlib.util.find_spec("xformers").origin
+        xformers_location = os.path.split(xformers_location)[0]
+        cutlass = Path(xformers_location) / "ops" / "fmha" / "cutlass.py"
+        try:
+            if cutlass.exists():
+                with open(cutlass, "r+", encoding = "utf-8") as f:
+                    text = f.read()
+                    # See https://github.com/facebookresearch/xformers/issues/1176#issuecomment-2545829591
+                    if "num_splits_key=-1," in text:
+                        text = text.replace(
+                            "num_splits_key=-1,",
+                            "num_splits_key=None,",
+                        )
+                        f.seek(0)
+                        f.write(text)
+                        f.truncate()
+                        if UNSLOTH_ENABLE_LOGGING:
+                            print("Unsloth: Patching Xformers to fix some performance issues.")
+        except:
+            pass
+pass
+
+# ValueError: 'aimv2' is already used by a Transformers config, pick another name.
+def fix_vllm_aimv2_issue():
+    vllm_version = importlib_version("vllm")
+    if Version(vllm_version) < Version("0.10.1"):
+        vllm_version = importlib.util.find_spec("xformers").origin
+        vllm_version = os.path.split(vllm_version)[0]
+        ovis_config = Path(vllm_version) / "transformers_utils" / "configs" / "ovis.py"
+        try:
+            if ovis_config.exists():
+                with open(ovis_config, "r+", encoding = "utf-8") as f:
+                    text = f.read()
+                    # See https://github.com/vllm-project/vllm-ascend/issues/2046
+                    if 'AutoConfig.register("aimv2", AIMv2Config)' in text:
+                        text = text.replace(
+                            'AutoConfig.register("aimv2", AIMv2Config)',
+                            '',
+                        )
+                        text = text.replace(
+                            '''backbone_config.pop('model_type')
+                backbone_config = AutoConfig.for_model(model_type,
+                                                       **backbone_config)''',
+                            '''if model_type != "aimv2":
+                    backbone_config.pop('model_type')
+                    backbone_config = AutoConfig.for_model(model_type, **backbone_config)
+                else:
+                    backbone_config = AIMv2Config(**backbone_config)'''
+                        )
+                        f.seek(0)
+                        f.write(text)
+                        f.truncate()
+                        if UNSLOTH_ENABLE_LOGGING:
+                            print("Unsloth: Patching vLLM to fix `'aimv2' is already used by a Transformers config, pick another name.`")
+        except:
+            pass
+pass

From a160e42ad8250f40b25e72e2a1b2e2d550986a65 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Wed, 20 Aug 2025 02:20:31 -0700
Subject: [PATCH 061/154] Update loader.py

---
 unsloth/models/loader.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py
index 72655782f9..0ff765bf4c 100644
--- a/unsloth/models/loader.py
+++ b/unsloth/models/loader.py
@@ -637,8 +637,8 @@ def from_pretrained(
                 # Set norms to float32 since anyways they get upcasted to float32
                 os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \
                     "all;None;None;"\
-                    "if hasattr(module, 'weight') and "\
-                    "torch.amax(dequantize_module_weight(module)) >= 1:"\
+                    "if 'down_projs' in name and hasattr(module, 'weight') and "\
+                    "torch.amax(dequantize_module_weight(module)) >= 1024:"\
                     "module._pre_set_compute_dtype = torch.float32\n"\
                     ""\
                     ";"

From 675c4effe78a3ef5bb3f21f6892f3edc54e1e935 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Wed, 20 Aug 2025 02:23:21 -0700
Subject: [PATCH 062/154] Update import_fixes.py

---
 unsloth/import_fixes.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/unsloth/import_fixes.py b/unsloth/import_fixes.py
index 126aac6365..1a4172e01f 100644
--- a/unsloth/import_fixes.py
+++ b/unsloth/import_fixes.py
@@ -54,6 +54,7 @@ def GetPrototype(self, descriptor):
 
 # Fix Xformers performance issues since 0.0.25
 def fix_xformers_performance_issue():
+    if importlib.util.find_spec("xformers") is None: return
     xformers_version = importlib_version("xformers")
     if Version(xformers_version) < Version("0.0.29"):
         xformers_location = importlib.util.find_spec("xformers").origin
@@ -80,6 +81,7 @@ def fix_xformers_performance_issue():
 
 # ValueError: 'aimv2' is already used by a Transformers config, pick another name.
 def fix_vllm_aimv2_issue():
+    if importlib.util.find_spec("vllm") is None: return
     vllm_version = importlib_version("vllm")
     if Version(vllm_version) < Version("0.10.1"):
         vllm_version = importlib.util.find_spec("xformers").origin

From a99d6b273c59f0908385559ba2d8b441751b6249 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Wed, 20 Aug 2025 02:26:23 -0700
Subject: [PATCH 063/154] Update import_fixes.py

---
 unsloth/import_fixes.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/unsloth/import_fixes.py b/unsloth/import_fixes.py
index 1a4172e01f..a07f9970f8 100644
--- a/unsloth/import_fixes.py
+++ b/unsloth/import_fixes.py
@@ -75,8 +75,9 @@ def fix_xformers_performance_issue():
                         f.truncate()
                         if UNSLOTH_ENABLE_LOGGING:
                             print("Unsloth: Patching Xformers to fix some performance issues.")
-        except:
-            pass
+        except Exception as e:
+            if UNSLOTH_ENABLE_LOGGING:
+                print(f"Unsloth: Failed patching Xformers with error = {str(e)}")
 pass
 
 # ValueError: 'aimv2' is already used by a Transformers config, pick another name.
@@ -84,7 +85,7 @@ def fix_vllm_aimv2_issue():
     if importlib.util.find_spec("vllm") is None: return
     vllm_version = importlib_version("vllm")
     if Version(vllm_version) < Version("0.10.1"):
-        vllm_version = importlib.util.find_spec("xformers").origin
+        vllm_version = importlib.util.find_spec("vllm").origin
         vllm_version = os.path.split(vllm_version)[0]
         ovis_config = Path(vllm_version) / "transformers_utils" / "configs" / "ovis.py"
         try:
@@ -112,6 +113,7 @@ def fix_vllm_aimv2_issue():
                         f.truncate()
                         if UNSLOTH_ENABLE_LOGGING:
                             print("Unsloth: Patching vLLM to fix `'aimv2' is already used by a Transformers config, pick another name.`")
-        except:
-            pass
+        except Exception as e:
+            if UNSLOTH_ENABLE_LOGGING:
+                print(f"Unsloth: Failed patching vLLM with error = {str(e)}")
 pass

From 7e8262303ef06bc39367a17acf0e783abb37c1b4 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Wed, 20 Aug 2025 02:38:39 -0700
Subject: [PATCH 064/154] Update loader.py

---
 unsloth/models/loader.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py
index 0ff765bf4c..050e077a39 100644
--- a/unsloth/models/loader.py
+++ b/unsloth/models/loader.py
@@ -638,7 +638,7 @@ def from_pretrained(
                 os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \
                     "all;None;None;"\
                     "if 'down_projs' in name and hasattr(module, 'weight') and "\
-                    "torch.amax(dequantize_module_weight(module)) >= 1024:"\
+                    "torch.amax(dequantize_module_weight(module)) >= 0:"\
                     "module._pre_set_compute_dtype = torch.float32\n"\
                     ""\
                     ";"

From 0e678d6fe9ef0aeced0380184bfb9e7c9b1a1778 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Wed, 20 Aug 2025 03:38:26 -0700
Subject: [PATCH 065/154] Update loader.py

---
 unsloth/models/loader.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py
index 050e077a39..1b110ca513 100644
--- a/unsloth/models/loader.py
+++ b/unsloth/models/loader.py
@@ -637,7 +637,7 @@ def from_pretrained(
                 # Set norms to float32 since anyways they get upcasted to float32
                 os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \
                     "all;None;None;"\
-                    "if 'down_projs' in name and hasattr(module, 'weight') and "\
+                    "if ('down_projs' in name or 'gate_up_projs' in name) and hasattr(module, 'weight') and "\
                     "torch.amax(dequantize_module_weight(module)) >= 0:"\
                     "module._pre_set_compute_dtype = torch.float32\n"\
                     ""\

From 9b82317a699779d8b96e986fe8ef7a3f16494247 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Wed, 20 Aug 2025 04:09:24 -0700
Subject: [PATCH 066/154] Update loader.py

---
 unsloth/models/loader.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py
index 1b110ca513..0da6b83d12 100644
--- a/unsloth/models/loader.py
+++ b/unsloth/models/loader.py
@@ -637,8 +637,8 @@ def from_pretrained(
                 # Set norms to float32 since anyways they get upcasted to float32
                 os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \
                     "all;None;None;"\
-                    "if ('down_projs' in name or 'gate_up_projs' in name) and hasattr(module, 'weight') and "\
-                    "torch.amax(dequantize_module_weight(module)) >= 0:"\
+                    "if ('down_projs' in name) and hasattr(module, 'weight') and "\
+                    "torch.amax(dequantize_module_weight(module)) >= 1024:"\
                     "module._pre_set_compute_dtype = torch.float32\n"\
                     ""\
                     ";"

From 8a76fd32bdf05d3e63dd6df309b52d861e11ef3f Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Wed, 20 Aug 2025 04:39:30 -0700
Subject: [PATCH 067/154] Upgrade

---
 pyproject.toml           | 4 ++--
 unsloth/__init__.py      | 2 +-
 unsloth/models/_utils.py | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index c4c3ebe6f5..83b75b0a00 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -37,7 +37,7 @@ triton = [
 ]
 
 huggingface = [
-    "unsloth_zoo>=2025.8.7",
+    "unsloth_zoo>=2025.8.8",
     "packaging",
     "tyro",
     "transformers>=4.51.3,!=4.47.0,!=4.52.0,!=4.52.1,!=4.52.2,!=4.52.3,!=4.53.0,!=4.54.0,!=4.55.0,!=4.55.1",
@@ -453,7 +453,7 @@ colab-ampere-torch220 = [
     "flash-attn>=2.6.3",
 ]
 colab-new = [
-    "unsloth_zoo>=2025.8.7",
+    "unsloth_zoo>=2025.8.8",
     "packaging",
     "tyro",
     "transformers>=4.51.3,!=4.47.0,!=4.52.0,!=4.52.1,!=4.52.2,!=4.52.3,!=4.53.0,!=4.54.0,!=4.55.0,!=4.55.1",
diff --git a/unsloth/__init__.py b/unsloth/__init__.py
index 335db48775..a6ea8f4c9f 100644
--- a/unsloth/__init__.py
+++ b/unsloth/__init__.py
@@ -212,7 +212,7 @@ def is_bf16_supported(): return SUPPORTS_BFLOAT16
 # Check for unsloth_zoo
 try:
     unsloth_zoo_version = importlib_version("unsloth_zoo")
-    if Version(unsloth_zoo_version) < Version("2025.8.1"):
+    if Version(unsloth_zoo_version) < Version("2025.8.8"):
         print(
             "Unsloth: Please update Unsloth and Unsloth-Zoo to the latest version!\n"\
             "Do this via `pip install --upgrade --force-reinstall --no-cache-dir --no-deps unsloth unsloth_zoo`"
diff --git a/unsloth/models/_utils.py b/unsloth/models/_utils.py
index 85f1a9a960..fde776a5e6 100644
--- a/unsloth/models/_utils.py
+++ b/unsloth/models/_utils.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__ = "2025.8.8"
+__version__ = "2025.8.9"
 
 __all__ = [
     "SUPPORTS_BFLOAT16",

From 94bcb28818558f7de378ef4356b5ac6651e545fa Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Wed, 20 Aug 2025 04:40:24 -0700
Subject: [PATCH 068/154] Update loader.py

---
 unsloth/models/loader.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py
index 0da6b83d12..54d2fa2ce6 100644
--- a/unsloth/models/loader.py
+++ b/unsloth/models/loader.py
@@ -638,7 +638,7 @@ def from_pretrained(
                 os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \
                     "all;None;None;"\
                     "if ('down_projs' in name) and hasattr(module, 'weight') and "\
-                    "torch.amax(dequantize_module_weight(module)) >= 1024:"\
+                    "torch.amax(dequantize_module_weight(module)) >= 0:"\
                     "module._pre_set_compute_dtype = torch.float32\n"\
                     ""\
                     ";"

From 7d7a1156843603b2b283f77e283801feffbb0ac6 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Wed, 20 Aug 2025 05:17:37 -0700
Subject: [PATCH 069/154] Update loader.py

---
 unsloth/models/loader.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py
index 54d2fa2ce6..878a7c4a4c 100644
--- a/unsloth/models/loader.py
+++ b/unsloth/models/loader.py
@@ -637,7 +637,7 @@ def from_pretrained(
                 # Set norms to float32 since anyways they get upcasted to float32
                 os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \
                     "all;None;None;"\
-                    "if ('down_projs' in name) and hasattr(module, 'weight') and "\
+                    "if hasattr(module, 'weight') and "\
                     "torch.amax(dequantize_module_weight(module)) >= 0:"\
                     "module._pre_set_compute_dtype = torch.float32\n"\
                     ""\

From 031f5e12487786462fc2f0306ff6792697b2dec7 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Wed, 20 Aug 2025 07:20:46 -0700
Subject: [PATCH 070/154] Update loader.py

---
 unsloth/models/loader.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py
index 878a7c4a4c..3af8200ebb 100644
--- a/unsloth/models/loader.py
+++ b/unsloth/models/loader.py
@@ -636,8 +636,8 @@ def from_pretrained(
                 # Set down projection compute dtype to be float32 for float16 machines
                 # Set norms to float32 since anyways they get upcasted to float32
                 os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \
-                    "all;None;None;"\
-                    "if hasattr(module, 'weight') and "\
+                    "torch.float16;torch.bfloat16;torch.bfloat16;"\
+                    "if ('down_projs' in name) and hasattr(module, 'weight') and "\
                     "torch.amax(dequantize_module_weight(module)) >= 0:"\
                     "module._pre_set_compute_dtype = torch.float32\n"\
                     ""\

From 98bee64be03b6988613e2e3b1dbc5013bff3242b Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Wed, 20 Aug 2025 07:34:42 -0700
Subject: [PATCH 071/154] Update loader.py

---
 unsloth/models/loader.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py
index 3af8200ebb..3aed8654f8 100644
--- a/unsloth/models/loader.py
+++ b/unsloth/models/loader.py
@@ -636,11 +636,13 @@ def from_pretrained(
                 # Set down projection compute dtype to be float32 for float16 machines
                 # Set norms to float32 since anyways they get upcasted to float32
                 os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \
-                    "torch.float16;torch.bfloat16;torch.bfloat16;"\
+                    "torch.float16;torch.bfloat16;torch.float16;"\
                     "if ('down_projs' in name) and hasattr(module, 'weight') and "\
                     "torch.amax(dequantize_module_weight(module)) >= 0:"\
                     "module._pre_set_compute_dtype = torch.float32\n"\
                     ""\
+                    "if ('mlp.router' in name) and hasattr(module, 'weight'):"\
+                    "module._pre_set_compute_dtype = torch.float32\n"\
                     ";"
             # Set norms to float32 since anyways they get upcasted to float32
             os.environ["UNSLOTH_HIGH_PRECISION_LAYERNORM"] = "1"

From 2ba900880d41c43e5322837d046f00425f3a249c Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Wed, 20 Aug 2025 17:24:53 -0700
Subject: [PATCH 072/154] Update vision.py

---
 unsloth/models/vision.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/unsloth/models/vision.py b/unsloth/models/vision.py
index 6790c5cd12..2d3e0a2002 100644
--- a/unsloth/models/vision.py
+++ b/unsloth/models/vision.py
@@ -245,6 +245,7 @@ def unsloth_base_fast_generate(
     return output
 pass
 
+global partial_model
 
 class FastBaseModel:
 
@@ -454,6 +455,9 @@ def from_pretrained(
         raise_handler.remove()
         # Return old flag
         os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = old_hf_transfer
+        global partial_model
+        partial_model = model
+        raise
 
         # Check float32 norm weights
         if os.environ.get("UNSLOTH_HIGH_PRECISION_LAYERNORM", "0") == "1":

From ea435e6d06712d59ebe00f8e23c86edacc96173a Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Wed, 20 Aug 2025 18:17:55 -0700
Subject: [PATCH 073/154] Update vision.py

---
 unsloth/models/vision.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/unsloth/models/vision.py b/unsloth/models/vision.py
index 2d3e0a2002..a61337b791 100644
--- a/unsloth/models/vision.py
+++ b/unsloth/models/vision.py
@@ -455,9 +455,6 @@ def from_pretrained(
         raise_handler.remove()
         # Return old flag
         os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = old_hf_transfer
-        global partial_model
-        partial_model = model
-        raise
 
         # Check float32 norm weights
         if os.environ.get("UNSLOTH_HIGH_PRECISION_LAYERNORM", "0") == "1":
@@ -525,6 +522,9 @@ def from_pretrained(
         )
         model, tokenizer = patch_tokenizer(model, tokenizer)
         model = post_patch_loss_function(model)
+        global partial_model
+        partial_model = model
+        raise
 
         # Log Unsloth version for future fastpaths for inference
         if hasattr(model, "config"):

From 5bebfa9f37b933a3b000a5aa3f22448ac8fde7c0 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Wed, 20 Aug 2025 19:13:42 -0700
Subject: [PATCH 074/154] custom_datatype

---
 unsloth/models/loader.py | 2 +-
 unsloth/models/vision.py | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py
index 3aed8654f8..9ab990133c 100644
--- a/unsloth/models/loader.py
+++ b/unsloth/models/loader.py
@@ -640,7 +640,7 @@ def from_pretrained(
                     "if ('down_projs' in name) and hasattr(module, 'weight') and "\
                     "torch.amax(dequantize_module_weight(module)) >= 0:"\
                     "module._pre_set_compute_dtype = torch.float32\n"\
-                    ""\
+                    "\n"\
                     "if ('mlp.router' in name) and hasattr(module, 'weight'):"\
                     "module._pre_set_compute_dtype = torch.float32\n"\
                     ";"
diff --git a/unsloth/models/vision.py b/unsloth/models/vision.py
index a61337b791..c57fd80ef5 100644
--- a/unsloth/models/vision.py
+++ b/unsloth/models/vision.py
@@ -463,6 +463,7 @@ def from_pretrained(
                     module._pre_set_compute_dtype = torch.float32
         pass
         # Edit data-types
+        print("custom_datatype", custom_datatype)
         if custom_datatype is not None:
             with torch.no_grad():
                 for jj, (name, module) in enumerate(model.named_modules()):

From 356789a65805931f09ffca007227d203f19d1ebc Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Wed, 20 Aug 2025 19:18:03 -0700
Subject: [PATCH 075/154] recheck

---
 unsloth/models/loader.py | 1 +
 unsloth/models/vision.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py
index 9ab990133c..3de0943917 100644
--- a/unsloth/models/loader.py
+++ b/unsloth/models/loader.py
@@ -646,6 +646,7 @@ def from_pretrained(
                     ";"
             # Set norms to float32 since anyways they get upcasted to float32
             os.environ["UNSLOTH_HIGH_PRECISION_LAYERNORM"] = "1"
+            print(os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"])
         else:
             for check_model_name in DISABLE_COMPILE_MODEL_NAMES:
                 if check_model_name in lowered_model_name:
diff --git a/unsloth/models/vision.py b/unsloth/models/vision.py
index c57fd80ef5..419d760f7a 100644
--- a/unsloth/models/vision.py
+++ b/unsloth/models/vision.py
@@ -359,7 +359,7 @@ def from_pretrained(
             custom_datatype = os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"]
             assert custom_datatype.count(";") >= 4
             checker, _dtype, _bnb_compute_dtype, _custom_datatype, execute_code = custom_datatype.split(";", 4)
-
+            print(checker, _dtype, _bnb_compute_dtype, _custom_datatype, execute_code)
             # Allow custom dtypes on all runs
             allow_all_runs = (checker == "all")
             # Allow only on float16 datatypes

From d0f97a9a0f295fbe08f3c6b4401b34bcea125ac1 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Wed, 20 Aug 2025 19:21:21 -0700
Subject: [PATCH 076/154] Float16

---
 unsloth/models/loader.py | 5 ++---
 unsloth/models/vision.py | 5 ++++-
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py
index 3de0943917..a7d3da17bd 100644
--- a/unsloth/models/loader.py
+++ b/unsloth/models/loader.py
@@ -601,7 +601,7 @@ def from_pretrained(
                 raise RuntimeError("Unsloth: Gemma 3N only works on transformers >= 4.53.0" + LATEST)
             os.environ["UNSLOTH_DISABLE_STATIC_GENERATION"] = "1"
             os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \
-                "float16;torch.float16;torch.float16;"\
+                "torch.float16;torch.float16;torch.float16;"\
                 "if name.endswith('norm'): "\
                 "module._pre_set_compute_dtype = torch.float32\n"\
                 ";"\
@@ -612,7 +612,7 @@ def from_pretrained(
             # Falcon must use float32 Triton ie TRITON_F32_DEFAULT = 'ieee'
             # since Mamba kernels error out on using lower precision
             os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \
-                "float16;torch.float32;torch.float16;"\
+                "torch.float16;torch.float32;torch.float16;"\
                 "if name.endswith(('q_proj', 'k_proj', 'v_proj', 'o_proj', 'gate_proj', 'up_proj', 'down_proj', 'head')): module.to(torch.float16)"\
                 ";"\
                 "os.environ['TRITON_F32_DEFAULT'] = 'ieee'"
@@ -646,7 +646,6 @@ def from_pretrained(
                     ";"
             # Set norms to float32 since anyways they get upcasted to float32
             os.environ["UNSLOTH_HIGH_PRECISION_LAYERNORM"] = "1"
-            print(os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"])
         else:
             for check_model_name in DISABLE_COMPILE_MODEL_NAMES:
                 if check_model_name in lowered_model_name:
diff --git a/unsloth/models/vision.py b/unsloth/models/vision.py
index 419d760f7a..12ec00c3bd 100644
--- a/unsloth/models/vision.py
+++ b/unsloth/models/vision.py
@@ -363,7 +363,10 @@ def from_pretrained(
             # Allow custom dtypes on all runs
             allow_all_runs = (checker == "all")
             # Allow only on float16 datatypes
-            allow_float16_runs = (checker == "float16" and dtype == torch.float16)
+            allow_float16_runs = (
+                (checker == "float16" or checker == "torch.float16") and \
+                (dtype == torch.float16)
+            )
 
             if allow_all_runs or allow_float16_runs:
                 if eval(_dtype) is not None:

From d83767f321203359cd31a096b502b6d81181fe77 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Wed, 20 Aug 2025 19:24:26 -0700
Subject: [PATCH 077/154] Update vision.py

---
 unsloth/models/vision.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/unsloth/models/vision.py b/unsloth/models/vision.py
index 12ec00c3bd..705647cb28 100644
--- a/unsloth/models/vision.py
+++ b/unsloth/models/vision.py
@@ -359,7 +359,6 @@ def from_pretrained(
             custom_datatype = os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"]
             assert custom_datatype.count(";") >= 4
             checker, _dtype, _bnb_compute_dtype, _custom_datatype, execute_code = custom_datatype.split(";", 4)
-            print(checker, _dtype, _bnb_compute_dtype, _custom_datatype, execute_code)
             # Allow custom dtypes on all runs
             allow_all_runs = (checker == "all")
             # Allow only on float16 datatypes
@@ -367,6 +366,7 @@ def from_pretrained(
                 (checker == "float16" or checker == "torch.float16") and \
                 (dtype == torch.float16)
             )
+            print([checker], [_dtype], [_bnb_compute_dtype], [_custom_datatype], [execute_code] )
 
             if allow_all_runs or allow_float16_runs:
                 if eval(_dtype) is not None:
@@ -387,7 +387,7 @@ def from_pretrained(
         if not ("attn_implementation" in kwargs):
             kwargs["attn_implementation"] = "sdpa"
         if not supports_sdpa:
-            print(f"Unsloth: {model_type_arch.title()} does not support SDPA - switching to eager!")
+            print(f"Unsloth: {model_type_arch.title()} does not support SDPA - switching to fast eager.")
             del kwargs["attn_implementation"]
         pass
 

From 5b575d87ef24302cb434743868836bcd95acc2f2 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Wed, 20 Aug 2025 19:27:58 -0700
Subject: [PATCH 078/154] Update vision.py

---
 unsloth/models/vision.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/unsloth/models/vision.py b/unsloth/models/vision.py
index 705647cb28..44f62d850d 100644
--- a/unsloth/models/vision.py
+++ b/unsloth/models/vision.py
@@ -366,7 +366,7 @@ def from_pretrained(
                 (checker == "float16" or checker == "torch.float16") and \
                 (dtype == torch.float16)
             )
-            print([checker], [_dtype], [_bnb_compute_dtype], [_custom_datatype], [execute_code] )
+            print([allow_float16_runs], [checker], [_dtype], [_bnb_compute_dtype], [_custom_datatype], [execute_code] )
 
             if allow_all_runs or allow_float16_runs:
                 if eval(_dtype) is not None:

From 66eee4deea47e76281497aeabc0be1a215ab9f39 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Wed, 20 Aug 2025 19:29:05 -0700
Subject: [PATCH 079/154] Update vision.py

---
 unsloth/models/vision.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/unsloth/models/vision.py b/unsloth/models/vision.py
index 44f62d850d..3ce03e6da7 100644
--- a/unsloth/models/vision.py
+++ b/unsloth/models/vision.py
@@ -366,7 +366,7 @@ def from_pretrained(
                 (checker == "float16" or checker == "torch.float16") and \
                 (dtype == torch.float16)
             )
-            print([allow_float16_runs], [checker], [_dtype], [_bnb_compute_dtype], [_custom_datatype], [execute_code] )
+            print([(checker == "float16" or checker == "torch.float16")], [dtype], [allow_float16_runs], [checker], [_dtype], [_bnb_compute_dtype], [_custom_datatype], [execute_code] )
 
             if allow_all_runs or allow_float16_runs:
                 if eval(_dtype) is not None:

From 27d044e47840785f40a195aa7ee77dcab1149046 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Wed, 20 Aug 2025 20:38:44 -0700
Subject: [PATCH 080/154] Update vision.py

---
 unsloth/models/vision.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/unsloth/models/vision.py b/unsloth/models/vision.py
index 3ce03e6da7..e125824c63 100644
--- a/unsloth/models/vision.py
+++ b/unsloth/models/vision.py
@@ -364,10 +364,8 @@ def from_pretrained(
             # Allow only on float16 datatypes
             allow_float16_runs = (
                 (checker == "float16" or checker == "torch.float16") and \
-                (dtype == torch.float16)
+                (dtype == torch.float16 or os.environ.get("UNSLOTH_FORCE_FLOAT32", "0") == "1")
             )
-            print([(checker == "float16" or checker == "torch.float16")], [dtype], [allow_float16_runs], [checker], [_dtype], [_bnb_compute_dtype], [_custom_datatype], [execute_code] )
-
             if allow_all_runs or allow_float16_runs:
                 if eval(_dtype) is not None:
                     dtype = eval(_dtype)

From 34d07d89463c21cbb33275ccffaf044e3d7df243 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Wed, 20 Aug 2025 20:42:24 -0700
Subject: [PATCH 081/154] Update vision.py

---
 unsloth/models/vision.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/unsloth/models/vision.py b/unsloth/models/vision.py
index e125824c63..23e2bb088a 100644
--- a/unsloth/models/vision.py
+++ b/unsloth/models/vision.py
@@ -464,7 +464,6 @@ def from_pretrained(
                     module._pre_set_compute_dtype = torch.float32
         pass
         # Edit data-types
-        print("custom_datatype", custom_datatype)
         if custom_datatype is not None:
             with torch.no_grad():
                 for jj, (name, module) in enumerate(model.named_modules()):
@@ -524,9 +523,6 @@ def from_pretrained(
         )
         model, tokenizer = patch_tokenizer(model, tokenizer)
         model = post_patch_loss_function(model)
-        global partial_model
-        partial_model = model
-        raise
 
         # Log Unsloth version for future fastpaths for inference
         if hasattr(model, "config"):

From 3ad756145f638cfaa2f15a21f24d4b97d58d4ad1 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Wed, 20 Aug 2025 21:29:44 -0700
Subject: [PATCH 082/154] Update loader.py

---
 unsloth/models/loader.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py
index a7d3da17bd..0156e2f059 100644
--- a/unsloth/models/loader.py
+++ b/unsloth/models/loader.py
@@ -638,11 +638,9 @@ def from_pretrained(
                 os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \
                     "torch.float16;torch.bfloat16;torch.float16;"\
                     "if ('down_projs' in name) and hasattr(module, 'weight') and "\
-                    "torch.amax(dequantize_module_weight(module)) >= 0:"\
+                    "torch.amax(dequantize_module_weight(module)) >= 1024:"\
                     "module._pre_set_compute_dtype = torch.float32\n"\
                     "\n"\
-                    "if ('mlp.router' in name) and hasattr(module, 'weight'):"\
-                    "module._pre_set_compute_dtype = torch.float32\n"\
                     ";"
             # Set norms to float32 since anyways they get upcasted to float32
             os.environ["UNSLOTH_HIGH_PRECISION_LAYERNORM"] = "1"

From b75729795a21149ff23f513469f603f21ddf7a0b Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Wed, 20 Aug 2025 21:31:05 -0700
Subject: [PATCH 083/154] Update loader.py

---
 unsloth/models/loader.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py
index 0156e2f059..14baa60d66 100644
--- a/unsloth/models/loader.py
+++ b/unsloth/models/loader.py
@@ -638,7 +638,7 @@ def from_pretrained(
                 os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \
                     "torch.float16;torch.bfloat16;torch.float16;"\
                     "if ('down_projs' in name) and hasattr(module, 'weight') and "\
-                    "torch.amax(dequantize_module_weight(module)) >= 1024:"\
+                    "torch.amax(dequantize_module_weight(module)) >= 102400:"\
                     "module._pre_set_compute_dtype = torch.float32\n"\
                     "\n"\
                     ";"

From ceeca866ae8cb9774a830d3fba84c9238c281d77 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Wed, 20 Aug 2025 21:44:30 -0700
Subject: [PATCH 084/154] Update loader.py

---
 unsloth/models/loader.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py
index 14baa60d66..4e0365ce1e 100644
--- a/unsloth/models/loader.py
+++ b/unsloth/models/loader.py
@@ -638,7 +638,7 @@ def from_pretrained(
                 os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \
                     "torch.float16;torch.bfloat16;torch.float16;"\
                     "if ('down_projs' in name) and hasattr(module, 'weight') and "\
-                    "torch.amax(dequantize_module_weight(module)) >= 102400:"\
+                    "torch.amax(dequantize_module_weight(module)) >= 512:"\
                     "module._pre_set_compute_dtype = torch.float32\n"\
                     "\n"\
                     ";"

From 87758b98edf6cc2aa8addbd19cfba4678fa3cc2c Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Wed, 20 Aug 2025 21:51:36 -0700
Subject: [PATCH 085/154] Update loader.py

---
 unsloth/models/loader.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py
index 4e0365ce1e..85696859ae 100644
--- a/unsloth/models/loader.py
+++ b/unsloth/models/loader.py
@@ -638,9 +638,11 @@ def from_pretrained(
                 os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \
                     "torch.float16;torch.bfloat16;torch.float16;"\
                     "if ('down_projs' in name) and hasattr(module, 'weight') and "\
-                    "torch.amax(dequantize_module_weight(module)) >= 512:"\
+                    "torch.amax(dequantize_module_weight(module)) >= 256:"\
                     "module._pre_set_compute_dtype = torch.float32\n"\
                     "\n"\
+                    "if ('mlp.router' in name) and hasattr(module, 'weight'):"\
+                    "module._pre_set_compute_dtype = torch.float32\n"\
                     ";"
             # Set norms to float32 since anyways they get upcasted to float32
             os.environ["UNSLOTH_HIGH_PRECISION_LAYERNORM"] = "1"

From 97d34d48536b35c0d2fd7d60995c099aea8a6d83 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Thu, 21 Aug 2025 00:23:27 -0700
Subject: [PATCH 086/154] Update loader.py

---
 unsloth/models/loader.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py
index 85696859ae..4e0365ce1e 100644
--- a/unsloth/models/loader.py
+++ b/unsloth/models/loader.py
@@ -638,11 +638,9 @@ def from_pretrained(
                 os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \
                     "torch.float16;torch.bfloat16;torch.float16;"\
                     "if ('down_projs' in name) and hasattr(module, 'weight') and "\
-                    "torch.amax(dequantize_module_weight(module)) >= 256:"\
+                    "torch.amax(dequantize_module_weight(module)) >= 512:"\
                     "module._pre_set_compute_dtype = torch.float32\n"\
                     "\n"\
-                    "if ('mlp.router' in name) and hasattr(module, 'weight'):"\
-                    "module._pre_set_compute_dtype = torch.float32\n"\
                     ";"
             # Set norms to float32 since anyways they get upcasted to float32
             os.environ["UNSLOTH_HIGH_PRECISION_LAYERNORM"] = "1"

From 43bf41f9df86e3bb2bf40e4db8957e0418fbc5e6 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Thu, 21 Aug 2025 00:24:39 -0700
Subject: [PATCH 087/154] Update loader.py

---
 unsloth/models/loader.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py
index 4e0365ce1e..94a07bf06a 100644
--- a/unsloth/models/loader.py
+++ b/unsloth/models/loader.py
@@ -638,7 +638,7 @@ def from_pretrained(
                 os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \
                     "torch.float16;torch.bfloat16;torch.float16;"\
                     "if ('down_projs' in name) and hasattr(module, 'weight') and "\
-                    "torch.amax(dequantize_module_weight(module)) >= 512:"\
+                    "torch.amax(dequantize_module_weight(module)) >= 256:"\
                     "module._pre_set_compute_dtype = torch.float32\n"\
                     "\n"\
                     ";"

From 6e7ad5259d13c959cb08ee81a97547425144d639 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Thu, 21 Aug 2025 00:26:49 -0700
Subject: [PATCH 088/154] Update loader.py

---
 unsloth/models/loader.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py
index 94a07bf06a..c9c1e05553 100644
--- a/unsloth/models/loader.py
+++ b/unsloth/models/loader.py
@@ -638,7 +638,7 @@ def from_pretrained(
                 os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \
                     "torch.float16;torch.bfloat16;torch.float16;"\
                     "if ('down_projs' in name) and hasattr(module, 'weight') and "\
-                    "torch.amax(dequantize_module_weight(module)) >= 256:"\
+                    "torch.amax(dequantize_module_weight(module)) >= 128:"\
                     "module._pre_set_compute_dtype = torch.float32\n"\
                     "\n"\
                     ";"

From d605aa7311bffa8e80ae6ec3e6f34716d209e140 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Thu, 21 Aug 2025 00:35:38 -0700
Subject: [PATCH 089/154] Update loader.py

---
 unsloth/models/loader.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py
index c9c1e05553..6ec045eb36 100644
--- a/unsloth/models/loader.py
+++ b/unsloth/models/loader.py
@@ -638,7 +638,7 @@ def from_pretrained(
                 os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \
                     "torch.float16;torch.bfloat16;torch.float16;"\
                     "if ('down_projs' in name) and hasattr(module, 'weight') and "\
-                    "torch.amax(dequantize_module_weight(module)) >= 128:"\
+                    "torch.amax(dequantize_module_weight(module)) >= 0:"\
                     "module._pre_set_compute_dtype = torch.float32\n"\
                     "\n"\
                     ";"

From f417dc882969acfd9e11a4a3d0ed7b548371aa2e Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Thu, 21 Aug 2025 00:51:06 -0700
Subject: [PATCH 090/154] Update loader.py

---
 unsloth/models/loader.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py
index 6ec045eb36..a7d3da17bd 100644
--- a/unsloth/models/loader.py
+++ b/unsloth/models/loader.py
@@ -641,6 +641,8 @@ def from_pretrained(
                     "torch.amax(dequantize_module_weight(module)) >= 0:"\
                     "module._pre_set_compute_dtype = torch.float32\n"\
                     "\n"\
+                    "if ('mlp.router' in name) and hasattr(module, 'weight'):"\
+                    "module._pre_set_compute_dtype = torch.float32\n"\
                     ";"
             # Set norms to float32 since anyways they get upcasted to float32
             os.environ["UNSLOTH_HIGH_PRECISION_LAYERNORM"] = "1"

From 05fe3d1fd7d6f202a4f8b50262d5d00127eb72e2 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Thu, 21 Aug 2025 01:21:10 -0700
Subject: [PATCH 091/154] Update loader.py

---
 unsloth/models/loader.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py
index a7d3da17bd..28bb896760 100644
--- a/unsloth/models/loader.py
+++ b/unsloth/models/loader.py
@@ -637,7 +637,7 @@ def from_pretrained(
                 # Set norms to float32 since anyways they get upcasted to float32
                 os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \
                     "torch.float16;torch.bfloat16;torch.float16;"\
-                    "if ('down_projs' in name) and hasattr(module, 'weight') and "\
+                    "if ('down_projs' in name or 'gate_up_proj' in name) and hasattr(module, 'weight') and "\
                     "torch.amax(dequantize_module_weight(module)) >= 0:"\
                     "module._pre_set_compute_dtype = torch.float32\n"\
                     "\n"\

From a79d6f6ac880e17b6079b1ba7981b130615a19dc Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Thu, 21 Aug 2025 01:54:48 -0700
Subject: [PATCH 092/154] Update loader.py

---
 unsloth/models/loader.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py
index 28bb896760..a7d3da17bd 100644
--- a/unsloth/models/loader.py
+++ b/unsloth/models/loader.py
@@ -637,7 +637,7 @@ def from_pretrained(
                 # Set norms to float32 since anyways they get upcasted to float32
                 os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \
                     "torch.float16;torch.bfloat16;torch.float16;"\
-                    "if ('down_projs' in name or 'gate_up_proj' in name) and hasattr(module, 'weight') and "\
+                    "if ('down_projs' in name) and hasattr(module, 'weight') and "\
                     "torch.amax(dequantize_module_weight(module)) >= 0:"\
                     "module._pre_set_compute_dtype = torch.float32\n"\
                     "\n"\

From 59702c494078128468015ccd003761e83ca2451a Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Thu, 21 Aug 2025 02:09:23 -0700
Subject: [PATCH 093/154] Update loader.py

---
 unsloth/models/loader.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py
index a7d3da17bd..b95678a499 100644
--- a/unsloth/models/loader.py
+++ b/unsloth/models/loader.py
@@ -643,6 +643,10 @@ def from_pretrained(
                     "\n"\
                     "if ('mlp.router' in name) and hasattr(module, 'weight'):"\
                     "module._pre_set_compute_dtype = torch.float32\n"\
+                    "\n"\
+                    "if ('self_attn' in name) and hasattr(module, 'sinks'):"\
+                    "module.sinks._pre_set_compute_dtype = torch.float32\n"\
+                    "\n"\
                     ";"
             # Set norms to float32 since anyways they get upcasted to float32
             os.environ["UNSLOTH_HIGH_PRECISION_LAYERNORM"] = "1"

From 1b66aee7b2f395ba51e1a2e69219f2c08701a95c Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Thu, 21 Aug 2025 02:32:41 -0700
Subject: [PATCH 094/154] Update loader.py

---
 unsloth/models/loader.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py
index b95678a499..ef39e636c2 100644
--- a/unsloth/models/loader.py
+++ b/unsloth/models/loader.py
@@ -637,7 +637,7 @@ def from_pretrained(
                 # Set norms to float32 since anyways they get upcasted to float32
                 os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \
                     "torch.float16;torch.bfloat16;torch.float16;"\
-                    "if ('down_projs' in name) and hasattr(module, 'weight') and "\
+                    "if ('down_projs' in name or '_proj' in name) and hasattr(module, 'weight') and "\
                     "torch.amax(dequantize_module_weight(module)) >= 0:"\
                     "module._pre_set_compute_dtype = torch.float32\n"\
                     "\n"\

From a71fa05c7a7a8e72547a7c054e659ce1149e088e Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Thu, 21 Aug 2025 02:51:45 -0700
Subject: [PATCH 095/154] Update loader.py

---
 unsloth/models/loader.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py
index ef39e636c2..dd0a3961e7 100644
--- a/unsloth/models/loader.py
+++ b/unsloth/models/loader.py
@@ -637,7 +637,7 @@ def from_pretrained(
                 # Set norms to float32 since anyways they get upcasted to float32
                 os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \
                     "torch.float16;torch.bfloat16;torch.float16;"\
-                    "if ('down_projs' in name or '_proj' in name) and hasattr(module, 'weight') and "\
+                    "if ('down_projs') and hasattr(module, 'weight') and "\
                     "torch.amax(dequantize_module_weight(module)) >= 0:"\
                     "module._pre_set_compute_dtype = torch.float32\n"\
                     "\n"\
@@ -647,6 +647,9 @@ def from_pretrained(
                     "if ('self_attn' in name) and hasattr(module, 'sinks'):"\
                     "module.sinks._pre_set_compute_dtype = torch.float32\n"\
                     "\n"\
+                    "if ('embed_tokens' in name):"\
+                    "module.sinks._pre_set_compute_dtype = torch.float32\n"\
+                    "\n"\
                     ";"
             # Set norms to float32 since anyways they get upcasted to float32
             os.environ["UNSLOTH_HIGH_PRECISION_LAYERNORM"] = "1"

From d3e8625b1de6703165535f985d54ebf621eec1ae Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Thu, 21 Aug 2025 02:53:29 -0700
Subject: [PATCH 096/154] Update loader.py

---
 unsloth/models/loader.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py
index dd0a3961e7..1c64ae4cfc 100644
--- a/unsloth/models/loader.py
+++ b/unsloth/models/loader.py
@@ -647,8 +647,8 @@ def from_pretrained(
                     "if ('self_attn' in name) and hasattr(module, 'sinks'):"\
                     "module.sinks._pre_set_compute_dtype = torch.float32\n"\
                     "\n"\
-                    "if ('embed_tokens' in name):"\
-                    "module.sinks._pre_set_compute_dtype = torch.float32\n"\
+                    "if ('embed_tokens' in name) and hasattr(module, 'weight'):"\
+                    "module._pre_set_compute_dtype = torch.float32\n"\
                     "\n"\
                     ";"
             # Set norms to float32 since anyways they get upcasted to float32

From fb112cf3c6b48df1afcf51827f775ce1fee951eb Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Thu, 21 Aug 2025 03:09:03 -0700
Subject: [PATCH 097/154] Update loader.py

---
 unsloth/models/loader.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py
index 1c64ae4cfc..e8c410ebd1 100644
--- a/unsloth/models/loader.py
+++ b/unsloth/models/loader.py
@@ -647,7 +647,7 @@ def from_pretrained(
                     "if ('self_attn' in name) and hasattr(module, 'sinks'):"\
                     "module.sinks._pre_set_compute_dtype = torch.float32\n"\
                     "\n"\
-                    "if ('embed_tokens' in name) and hasattr(module, 'weight'):"\
+                    "if ('embed_tokens' in name or 'lm_head' in name) and hasattr(module, 'weight'):"\
                     "module._pre_set_compute_dtype = torch.float32\n"\
                     "\n"\
                     ";"

From 5dbdcc565dd6dc8fa5edc2bf4314ad326ffef18c Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Thu, 21 Aug 2025 03:29:27 -0700
Subject: [PATCH 098/154] Update loader.py

---
 unsloth/models/loader.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py
index e8c410ebd1..c9e0646af7 100644
--- a/unsloth/models/loader.py
+++ b/unsloth/models/loader.py
@@ -648,7 +648,7 @@ def from_pretrained(
                     "module.sinks._pre_set_compute_dtype = torch.float32\n"\
                     "\n"\
                     "if ('embed_tokens' in name or 'lm_head' in name) and hasattr(module, 'weight'):"\
-                    "module._pre_set_compute_dtype = torch.float32\n"\
+                    "module._pre_set_compute_dtype = torch.bfloat16\n"\
                     "\n"\
                     ";"
             # Set norms to float32 since anyways they get upcasted to float32

From fdaa0074093bfffd626632bf8153d52eb7c30a4e Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Thu, 21 Aug 2025 04:02:33 -0700
Subject: [PATCH 099/154] Update loader.py

---
 unsloth/models/loader.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py
index c9e0646af7..71459599a5 100644
--- a/unsloth/models/loader.py
+++ b/unsloth/models/loader.py
@@ -639,13 +639,13 @@ def from_pretrained(
                     "torch.float16;torch.bfloat16;torch.float16;"\
                     "if ('down_projs') and hasattr(module, 'weight') and "\
                     "torch.amax(dequantize_module_weight(module)) >= 0:"\
-                    "module._pre_set_compute_dtype = torch.float32\n"\
+                    "module._pre_set_compute_dtype = torch.bfloat16\n"\
                     "\n"\
                     "if ('mlp.router' in name) and hasattr(module, 'weight'):"\
-                    "module._pre_set_compute_dtype = torch.float32\n"\
+                    "module._pre_set_compute_dtype = torch.bfloat16\n"\
                     "\n"\
                     "if ('self_attn' in name) and hasattr(module, 'sinks'):"\
-                    "module.sinks._pre_set_compute_dtype = torch.float32\n"\
+                    "module.sinks._pre_set_compute_dtype = torch.bfloat16\n"\
                     "\n"\
                     "if ('embed_tokens' in name or 'lm_head' in name) and hasattr(module, 'weight'):"\
                     "module._pre_set_compute_dtype = torch.bfloat16\n"\

From ba0eb04d9076811da446e8a7d46717ac91fd2ada Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Thu, 21 Aug 2025 04:19:00 -0700
Subject: [PATCH 100/154] Bug fix

---
 unsloth/models/loader.py | 2 +-
 unsloth/models/vision.py | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py
index 71459599a5..7b8320c65a 100644
--- a/unsloth/models/loader.py
+++ b/unsloth/models/loader.py
@@ -637,7 +637,7 @@ def from_pretrained(
                 # Set norms to float32 since anyways they get upcasted to float32
                 os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \
                     "torch.float16;torch.bfloat16;torch.float16;"\
-                    "if ('down_projs') and hasattr(module, 'weight') and "\
+                    "if ('_proj' in name) and hasattr(module, 'weight') and "\
                     "torch.amax(dequantize_module_weight(module)) >= 0:"\
                     "module._pre_set_compute_dtype = torch.bfloat16\n"\
                     "\n"\
diff --git a/unsloth/models/vision.py b/unsloth/models/vision.py
index 23e2bb088a..486a049339 100644
--- a/unsloth/models/vision.py
+++ b/unsloth/models/vision.py
@@ -213,7 +213,8 @@ def unsloth_base_fast_generate(
         cache_implementation = None
     if cache_implementation is not None:
         swa = getattr(getattr(self.config, "text_config", self.config), "sliding_window", None)
-        if swa == 0 or type(swa) is not int:
+        if (swa == 0 or type(swa) is not int) \
+            and (getattr(self, "_can_compile_fullgraph", True) is True):
             cache_implementation = "static"
         else:
             cache_implementation = "hybrid"

From 3f982620a575c0117aafc572c4767d77ced7304b Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Thu, 21 Aug 2025 05:47:58 -0700
Subject: [PATCH 101/154] Update loader.py

---
 unsloth/models/loader.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py
index 7b8320c65a..f6bb23551d 100644
--- a/unsloth/models/loader.py
+++ b/unsloth/models/loader.py
@@ -636,7 +636,7 @@ def from_pretrained(
                 # Set down projection compute dtype to be float32 for float16 machines
                 # Set norms to float32 since anyways they get upcasted to float32
                 os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \
-                    "torch.float16;torch.bfloat16;torch.float16;"\
+                    "torch.float16;torch.bfloat16;torch.bfloat16;"\
                     "if ('_proj' in name) and hasattr(module, 'weight') and "\
                     "torch.amax(dequantize_module_weight(module)) >= 0:"\
                     "module._pre_set_compute_dtype = torch.bfloat16\n"\

From 3e6511b84f297289bf694893b023db35fd24fc49 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Thu, 21 Aug 2025 06:37:40 -0700
Subject: [PATCH 102/154] Update loader.py

---
 unsloth/models/loader.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py
index f6bb23551d..889d170a17 100644
--- a/unsloth/models/loader.py
+++ b/unsloth/models/loader.py
@@ -785,7 +785,8 @@ def from_pretrained(
         model_types = ["siglip"] + model_types
 
         # Set forced float32 env flag
-        os.environ["UNSLOTH_FORCE_FLOAT32"] = "0"
+        if "UNSLOTH_FORCE_FLOAT32" not in os.environ:
+            os.environ["UNSLOTH_FORCE_FLOAT32"] = "0"
         do_forced_float32 = False
         for model_type_arch in model_types:
             if model_type_arch != "siglip": break

From c9e75375b31d14c66e9f8846e2793f96e9bfee71 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Thu, 21 Aug 2025 07:00:44 -0700
Subject: [PATCH 103/154] Update loader.py

---
 unsloth/models/loader.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py
index 889d170a17..3112f674fe 100644
--- a/unsloth/models/loader.py
+++ b/unsloth/models/loader.py
@@ -641,6 +641,9 @@ def from_pretrained(
                     "torch.amax(dequantize_module_weight(module)) >= 0:"\
                     "module._pre_set_compute_dtype = torch.bfloat16\n"\
                     "\n"\
+                    "if hasattr(module, 'weight'):"\
+                    "module._pre_set_compute_dtype = torch.bfloat16\n"\
+                    "\n"\
                     "if ('mlp.router' in name) and hasattr(module, 'weight'):"\
                     "module._pre_set_compute_dtype = torch.bfloat16\n"\
                     "\n"\

From 2e38e8a9b9e46b5bb4bf026dfff677728d662297 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Fri, 22 Aug 2025 03:42:08 -0700
Subject: [PATCH 104/154] Update loader.py

---
 unsloth/models/loader.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py
index 3112f674fe..9ae1448762 100644
--- a/unsloth/models/loader.py
+++ b/unsloth/models/loader.py
@@ -897,6 +897,8 @@ def from_pretrained(
 
         if load_in_4bit:
             # Fix up bitsandbytes config
+            print("torch_dtype", model.config.to_dict().get("torch_dtype"))
+            print("dtype", model.config.to_dict().get("dtype"))
             quantization_config = \
             {
                 # Sometimes torch_dtype is not a string!!

From 8b3a8bacf4a19133d9d4952fad7fd65d437861a8 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Fri, 22 Aug 2025 03:44:29 -0700
Subject: [PATCH 105/154] Update loader.py

---
 unsloth/models/loader.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py
index 9ae1448762..1b3b2d6011 100644
--- a/unsloth/models/loader.py
+++ b/unsloth/models/loader.py
@@ -436,10 +436,12 @@ def from_pretrained(
 
         if load_in_4bit:
             # Fix up bitsandbytes config
+            config = model.config.to_dict()
+            torch_dtype = config.get("dtype") or config.get("torch_dtype")
             quantization_config = \
             {
                 # Sometimes torch_dtype is not a string!!
-                "bnb_4bit_compute_dtype"           : model.config.to_dict()["torch_dtype"],
+                "bnb_4bit_compute_dtype"           : torch_dtype,
                 "bnb_4bit_quant_type"              : "nf4",
                 "bnb_4bit_use_double_quant"        : True,
                 "llm_int8_enable_fp32_cpu_offload" : False,
@@ -897,12 +899,12 @@ def from_pretrained(
 
         if load_in_4bit:
             # Fix up bitsandbytes config
-            print("torch_dtype", model.config.to_dict().get("torch_dtype"))
-            print("dtype", model.config.to_dict().get("dtype"))
+            config = model.config.to_dict()
+            torch_dtype = config.get("dtype") or config.get("torch_dtype")
             quantization_config = \
             {
                 # Sometimes torch_dtype is not a string!!
-                "bnb_4bit_compute_dtype"           : model.config.to_dict()["torch_dtype"],
+                "bnb_4bit_compute_dtype"           : torch_dtype,
                 "bnb_4bit_quant_type"              : "nf4",
                 "bnb_4bit_use_double_quant"        : True,
                 "llm_int8_enable_fp32_cpu_offload" : False,

From f706d20e56924bdb26190625ebb66bac4eaa63d6 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Fri, 22 Aug 2025 03:59:09 -0700
Subject: [PATCH 106/154] torch_dtype

---
 unsloth/models/vision.py | 19 ++++++++++++++-----
 unsloth/save.py          | 15 +++++++++++----
 2 files changed, 25 insertions(+), 9 deletions(-)

diff --git a/unsloth/models/vision.py b/unsloth/models/vision.py
index 486a049339..fc31032594 100644
--- a/unsloth/models/vision.py
+++ b/unsloth/models/vision.py
@@ -73,6 +73,9 @@
 PROMPT_LOOPKUP = dict()
 
 from transformers import GenerationConfig, CompileConfig, HybridCache
+from transformers import PretrainedConfig
+HAS_TORCH_DTYPE = "torch_dtype" in PretrainedConfig.__doc__
+
 _compile_config = CompileConfig(
     fullgraph = False,
     dynamic = None,
@@ -118,7 +121,7 @@ def unsloth_base_fast_generate(
     bsz = input_ids.shape[0]
 
     FastBaseModel.for_inference(self)
-    dtype = _get_dtype(self.config.torch_dtype)
+    dtype = _get_dtype(getattr(self.config, "dtype", None) or getattr(self.config, "torch_dtype", None))
 
     # Check if VLM
     is_vlm = any(
@@ -246,8 +249,6 @@ def unsloth_base_fast_generate(
     return output
 pass
 
-global partial_model
-
 class FastBaseModel:
 
     @staticmethod
@@ -443,11 +444,17 @@ def from_pretrained(
         torch_dtype = dtype
         if do_forced_float32: torch_dtype = torch.bfloat16
 
+        if HAS_TORCH_DTYPE:
+            kwargs["torch_dtype"] = torch_dtype
+        else:
+            # Transformers removed torch_dtype
+            kwargs["dtype"] = torch_dtype
+
         raise_handler = RaiseUninitialized()
         model = auto_model.from_pretrained(
             model_name,
             device_map              = device_map,
-            torch_dtype             = torch_dtype,
+            # torch_dtype           = torch_dtype, # Transformers removed torch_dtype
             # quantization_config   = bnb_config,
             token                   = token,
             trust_remote_code       = trust_remote_code,
@@ -698,7 +705,9 @@ def post_patch_model(
         full_finetuning = os.environ.get("UNSLOTH_ENABLE_FULL_FINETUNING", "0") == "1"
 
         float32_mixed_precision = True
-        if _get_dtype(model.config.torch_dtype) == torch.bfloat16 and full_finetuning:
+        if _get_dtype(
+                getattr(model.config, "dtype", None) or getattr(model.config, "torch_dtype", None)
+            ) == torch.bfloat16 and full_finetuning:
             # Use bfloat16 precision for full finetuning
             float32_mixed_precision = False
 
diff --git a/unsloth/save.py b/unsloth/save.py
index 9539b66701..4535c7dc42 100644
--- a/unsloth/save.py
+++ b/unsloth/save.py
@@ -549,11 +549,14 @@ def unsloth_save_model(
     from collections import OrderedDict
     state_dict = OrderedDict()
 
-    torch_dtype = internal_model.config.torch_dtype
+    torch_dtype = \
+        getattr(internal_model.config, "dtype", None) or \
+        getattr(internal_model.config, "torch_dtype", None)
     if type(torch_dtype) is str:
         if   torch_dtype ==  "float16": torch_dtype = torch.float16
         elif torch_dtype == "bfloat16": torch_dtype = torch.bfloat16
-    pass
+    else:
+        torch_dtype = internal_model.model.embed_tokens.weight.dtype
 
     # Check modules to save float32 dtype
     state_dict["model.embed_tokens.weight"] = internal_model.model.embed_tokens.weight.data.to(torch_dtype)
@@ -1880,7 +1883,9 @@ def unsloth_save_pretrained_gguf(
     for _ in range(3):
         gc.collect()
 
-    model_dtype = self.config.torch_dtype
+    model_dtype = \
+        getattr(self.config, "dtype", None) or \
+        getattr(self.config, "torch_dtype", None)
     model_type  = self.config.model_type
     if type(model_dtype) is str:
         assert(model_dtype == "float16" or model_dtype == "bfloat16")
@@ -2058,7 +2063,9 @@ def unsloth_push_to_hub_gguf(
     for _ in range(3):
         gc.collect()
 
-    model_dtype = self.config.torch_dtype
+    model_dtype = \
+        getattr(self.config, "dtype", None) or \
+        getattr(self.config, "torch_dtype", None)
     model_type  = self.config.model_type
     if type(model_dtype) is str:
         assert(model_dtype == "float16" or model_dtype == "bfloat16")

From b56cc1b82cfb64a02bbe7a12afd1c05eaa4bf53d Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Thu, 4 Sep 2025 03:33:54 -0700
Subject: [PATCH 107/154] Update rl.py

---
 unsloth/models/rl.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/unsloth/models/rl.py b/unsloth/models/rl.py
index 0f1fa2dbf6..b1ab96c840 100644
--- a/unsloth/models/rl.py
+++ b/unsloth/models/rl.py
@@ -513,7 +513,7 @@ def _patch_trl_rl_trainers(trainer_file = "grpo_trainer"):
         "fp16"                          : False,
         "include_tokens_per_second"     : False,
         "include_num_input_tokens_seen" : False,
-        "auto_find_batch_size"          : True, # Auto /2 batch size
+        "auto_find_batch_size"          : False, # Auto /2 batch size - too many people complained so removing
         "dataloader_pin_memory"         : True,
         # Might fail so disable for now
         # "dataloader_persistent_workers" : True, # Keeps dataloader in RAM

From c47f9367f53c0495bace2aa145252955d620aa78 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Thu, 4 Sep 2025 03:55:38 -0700
Subject: [PATCH 108/154] Fix CE Loss

---
 unsloth/models/llama.py   |  4 ++--
 unsloth/models/mistral.py | 29 +++++++++++++++++++++--------
 2 files changed, 23 insertions(+), 10 deletions(-)

diff --git a/unsloth/models/llama.py b/unsloth/models/llama.py
index cf2ca75f75..f978060c9c 100644
--- a/unsloth/models/llama.py
+++ b/unsloth/models/llama.py
@@ -1236,7 +1236,7 @@ def _CausalLM_fast_forward(
             # < 1024 Normal Unsloth uses less VRAM!
             if bsz*q_len <= 1024: RETURN_LOGITS = True
 
-            if not RETURN_LOGITS and HAS_CUT_CROSS_ENTROPY and labels is not None:
+            if not RETURN_LOGITS and labels is not None:
 
                 n_items = kwargs.get("num_items_in_batch", None) or kwargs.get("n_items", None)
 
@@ -1259,7 +1259,7 @@ def _CausalLM_fast_forward(
                     mask                 = None,
                     n_items              = n_items,
                     scaling              = getattr(self, "accelerator_scaler", None),
-                    target_gb            = 1,
+                    target_gb            = None,
                     torch_compile        = True,
                     logit_softcapping    = logit_softcapping,
                 )
diff --git a/unsloth/models/mistral.py b/unsloth/models/mistral.py
index 6274f2e5df..faab2d30b1 100644
--- a/unsloth/models/mistral.py
+++ b/unsloth/models/mistral.py
@@ -300,17 +300,30 @@ def MistralForCausalLM_fast_forward(
         # < 1024 Normal Unsloth uses less VRAM!
         if bsz * q_len <= 1024: RETURN_LOGITS = True
 
-        if not RETURN_LOGITS and HAS_CUT_CROSS_ENTROPY and os.environ.get("UNSLOTH_ENABLE_CCE", "1") != "0" and labels is not None:
+        if not RETURN_LOGITS and labels is not None:
             n_items = kwargs.get("num_items_in_batch", None) or kwargs.get("n_items", None)
             logit_softcapping = getattr(self.config, "final_logit_softcapping", 0)
-            loss = fused_linear_cross_entropy(
-                hidden_states = hidden_states,
-                lm_weight = lm_head,
-                labels = labels,
-                num_items_in_batch = n_items,
-                logit_softcapping = logit_softcapping,
-            )
 
+            # loss = fused_linear_cross_entropy(
+            #     hidden_states = hidden_states,
+            #     lm_weight = lm_head,
+            #     labels = labels,
+            #     num_items_in_batch = n_items,
+            #     logit_softcapping = logit_softcapping,
+            # )
+            loss = unsloth_fused_ce_loss(
+                trainer              = None,
+                hidden_states        = hidden_states,
+                lm_head_weight       = lm_head,
+                lm_head_bias         = None,
+                labels               = labels,
+                mask                 = None,
+                n_items              = n_items,
+                scaling              = getattr(self, "accelerator_scaler", None),
+                target_gb            = None,
+                torch_compile        = True,
+                logit_softcapping    = logit_softcapping,
+            )
             if not return_dict:
                 output = (logits,) + outputs[1:]
                 return (loss,) + output if loss is not None else output

From 0b896c5f93e10a24b6db32d96627bb4482ff7558 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Thu, 4 Sep 2025 05:11:33 -0700
Subject: [PATCH 109/154] Versioning

---
 pyproject.toml      | 4 ++--
 unsloth/__init__.py | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 8c60cb5866..160182c2a2 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -37,7 +37,7 @@ triton = [
 ]
 
 huggingface = [
-    "unsloth_zoo>=2025.8.9",
+    "unsloth_zoo>=2025.9.1",
     "packaging",
     "tyro",
     "transformers>=4.51.3,!=4.52.0,!=4.52.1,!=4.52.2,!=4.52.3,!=4.53.0,!=4.54.0,!=4.55.0,!=4.55.1",
@@ -453,7 +453,7 @@ colab-ampere-torch220 = [
     "flash-attn>=2.6.3",
 ]
 colab-new = [
-    "unsloth_zoo>=2025.8.9",
+    "unsloth_zoo>=2025.9.1",
     "packaging",
     "tyro",
     "transformers>=4.51.3,!=4.47.0,!=4.52.0,!=4.52.1,!=4.52.2,!=4.52.3,!=4.53.0,!=4.54.0,!=4.55.0,!=4.55.1",
diff --git a/unsloth/__init__.py b/unsloth/__init__.py
index 1b2a9310ff..25a54165b7 100644
--- a/unsloth/__init__.py
+++ b/unsloth/__init__.py
@@ -214,7 +214,7 @@ def is_bf16_supported(): return SUPPORTS_BFLOAT16
 # Check for unsloth_zoo
 try:
     unsloth_zoo_version = importlib_version("unsloth_zoo")
-    if Version(unsloth_zoo_version) < Version("2025.8.8"):
+    if Version(unsloth_zoo_version) < Version("2025.9.1"):
         print(
             "Unsloth: Please update Unsloth and Unsloth-Zoo to the latest version!\n"\
             "Do this via `pip install --upgrade --force-reinstall --no-cache-dir --no-deps unsloth unsloth_zoo`"

From 7234a62f5b40d2ee96e65570a8e7a769e5449271 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Tue, 9 Sep 2025 01:59:13 -0700
Subject: [PATCH 110/154] Update loader.py

---
 unsloth/models/loader.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py
index b1844a1472..952f900ff4 100644
--- a/unsloth/models/loader.py
+++ b/unsloth/models/loader.py
@@ -527,6 +527,7 @@ def from_pretrained(
         qat_scheme                 = None,
         *args, **kwargs,
     ):
+        print("model_name", model_name)
         if token is None: token = get_token()
         # Login to allow private models
         if token is not None:

From 68c1aba08999d4f8801cda2194bcab5234109f31 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Tue, 9 Sep 2025 02:01:49 -0700
Subject: [PATCH 111/154] Update loader.py

---
 unsloth/models/loader.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py
index 952f900ff4..b689b1f3c2 100644
--- a/unsloth/models/loader.py
+++ b/unsloth/models/loader.py
@@ -254,7 +254,9 @@ def from_pretrained(
         # Get base model for PEFT:
         if is_peft:
             # Check base model again for PEFT
+            print("is_peft", model_name)
             model_name = peft_config.base_model_name_or_path
+            print("is_peft", model_name)
             if not use_exact_model_name:
                 model_name = get_model_name(model_name, load_in_4bit)
             model_config = AutoConfig.from_pretrained(

From 05fc2f2628b54ee2e867ff5c307abcfda7310cce Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Tue, 9 Sep 2025 04:31:12 -0700
Subject: [PATCH 112/154] extract_model_type_from_config

---
 unsloth/models/_utils.py | 33 ++++++++++++++++++++++++++++++++-
 unsloth/models/loader.py |  4 +---
 2 files changed, 33 insertions(+), 4 deletions(-)

diff --git a/unsloth/models/_utils.py b/unsloth/models/_utils.py
index 597ed0244b..0346ba13c1 100644
--- a/unsloth/models/_utils.py
+++ b/unsloth/models/_utils.py
@@ -12,12 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__ = "2025.9.2"
+__version__ = "2025.9.3"
 
 __all__ = [
     "SUPPORTS_BFLOAT16",
     "is_bfloat16_supported",
     "is_vLLM_available",
+    "extract_model_type_from_config",
 
     "prepare_model_for_kbit_training",
     "xformers",
@@ -1561,3 +1562,33 @@ def _prepare_model_for_qat(model: torch.nn.Module, qat_scheme: str) -> torch.nn.
     quantize_(model, QATConfig(base_config, step="prepare"), filter_fn=filter_fn)
     return model
 pass
+
+
+def extract_model_type_from_config(config):
+    """ Gets model_type from config file - can be PEFT or normal HF """
+    model_type = None
+    from peft import PeftConfig
+    if issubclass(type(config), PeftConfig):
+        model_type_list = re.finditer(r"transformers\.models\.([^\.]{2,})\.modeling_\1", str(config))
+        model_type_list = list(model_type_list)
+        # Use transformers.models.gpt_oss.modeling_gpt_oss
+        if len(model_type_list) != 0:
+            model_type = model_type_list[0].group(1)
+        elif hasattr(config, "auto_mapping"):
+            # Use GptOssForCausalLM
+            model_type = config.auto_mapping.get("base_model_class", None)
+            if model_type is None:
+                # Last resort use model name unsloth/gpt-oss-20b-unsloth-bnb-4bit
+                model_type = config.base_model_name_or_path
+                model_type = os.path.split(model_type)[-1]
+    else:
+        
+    if model_type is None:
+        raise TypeError(f"Unsloth: Cannot determine model type for config file: {str(config)}")
+
+    # Standardize model_type
+    model_type = model_type.lower()
+    model_type = model_type.replace("_", "-")
+    model_type = model_type.replace("/", "-")
+    return model_type
+pass
diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py
index c0b996ae02..9c26c8834e 100644
--- a/unsloth/models/loader.py
+++ b/unsloth/models/loader.py
@@ -20,6 +20,7 @@
     HAS_FLASH_ATTENTION_SOFTCAPPING,
     USE_MODELSCOPE,
     get_transformers_model_type,
+    extract_model_type_from_config,
 )
 from .granite import FastGraniteModel
 from .llama   import FastLlamaModel, logger
@@ -254,9 +255,7 @@ def from_pretrained(
         # Get base model for PEFT:
         if is_peft:
             # Check base model again for PEFT
-            print("is_peft", model_name)
             model_name = peft_config.base_model_name_or_path
-            print("is_peft", model_name)
             if not use_exact_model_name:
                 model_name = get_model_name(model_name, load_in_4bit)
             model_config = AutoConfig.from_pretrained(
@@ -529,7 +528,6 @@ def from_pretrained(
         qat_scheme                 = None,
         *args, **kwargs,
     ):
-        print("model_name", model_name)
         if token is None: token = get_token()
         # Login to allow private models
         if token is not None:

From 99c7afb3fcc8aaa755dba2ad9f74140ff978028c Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Tue, 9 Sep 2025 21:51:46 -0700
Subject: [PATCH 113/154] Model types

---
 unsloth/models/_utils.py |  39 +++++++++++---
 unsloth/models/loader.py | 114 +++++++++++++++++++++------------------
 2 files changed, 92 insertions(+), 61 deletions(-)

diff --git a/unsloth/models/_utils.py b/unsloth/models/_utils.py
index 0346ba13c1..f961a49de5 100644
--- a/unsloth/models/_utils.py
+++ b/unsloth/models/_utils.py
@@ -1566,7 +1566,9 @@ def _prepare_model_for_qat(model: torch.nn.Module, qat_scheme: str) -> torch.nn.
 
 def extract_model_type_from_config(config):
     """ Gets model_type from config file - can be PEFT or normal HF """
-    model_type = None
+    if config is None:
+        raise TypeError(f"Unsloth: Cannot determine model type for config file: {str(config)}")
+    model_types = None
     from peft import PeftConfig
     if issubclass(type(config), PeftConfig):
         model_type_list = re.finditer(r"transformers\.models\.([^\.]{2,})\.modeling_\1", str(config))
@@ -1574,6 +1576,7 @@ def extract_model_type_from_config(config):
         # Use transformers.models.gpt_oss.modeling_gpt_oss
         if len(model_type_list) != 0:
             model_type = model_type_list[0].group(1)
+            model_types = [model_type]
         elif hasattr(config, "auto_mapping"):
             # Use GptOssForCausalLM
             model_type = config.auto_mapping.get("base_model_class", None)
@@ -1581,14 +1584,34 @@ def extract_model_type_from_config(config):
                 # Last resort use model name unsloth/gpt-oss-20b-unsloth-bnb-4bit
                 model_type = config.base_model_name_or_path
                 model_type = os.path.split(model_type)[-1]
+            model_types = [model_type]
     else:
-        
-    if model_type is None:
+        from collections.abc import Mapping, Sequence
+        def find_values(data, target_key):
+            stack = [data]
+            while stack:
+                obj = stack.pop()
+                if isinstance(obj, Mapping):
+                    # Emit values for matches
+                    if target_key in obj:
+                        yield obj[target_key]
+                    # Keep walking into nested values
+                    stack.extend(obj.values())
+                elif isinstance(obj, Sequence) and not isinstance(obj, (str, bytes, bytearray)):
+                    # Walk sequences (lists/tuples/sets), but not strings/bytes
+                    stack.extend(obj)
+        model_types = list(find_values(getattr(config, "to_dict", lambda *args, **kwargs: {})(), "model_type"))
+    pass
+    if model_types is None:
         raise TypeError(f"Unsloth: Cannot determine model type for config file: {str(config)}")
-
     # Standardize model_type
-    model_type = model_type.lower()
-    model_type = model_type.replace("_", "-")
-    model_type = model_type.replace("/", "-")
-    return model_type
+    final_model_types = []
+    for model_type in model_types:
+        model_type = model_type.lower()
+        model_type = model_type.replace("_", "")
+        model_type = model_type.replace("-", "")
+        model_type = model_type.replace("/", "")
+        model_type = model_type.replace(".", "")
+        final_model_types.append(model_type)
+    return tuple(sorted(final_model_types))
 pass
diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py
index 9c26c8834e..6cefe33aaf 100644
--- a/unsloth/models/loader.py
+++ b/unsloth/models/loader.py
@@ -84,7 +84,8 @@
 global FORCE_FLOAT32
 FORCE_FLOAT32 = [
     "gemma3",
-    "gpt_oss",
+    "gemma3n",
+    "gptoss",
 ]
 
 class FastLanguageModel(FastLlamaModel):
@@ -178,6 +179,8 @@ def from_pretrained(
 
         autoconfig_error = None
         peft_error = None
+        model_config = None
+        peft_config = None
         try:
             model_config = AutoConfig.from_pretrained(
                 model_name,
@@ -201,8 +204,12 @@ def from_pretrained(
             peft_error = str(error)
             is_peft = False
         pass
-
-        # Both config.json and adapter_config.json should not exist!
+        model_types = extract_model_type_from_config(model_config or peft_config)
+        if len(model_types) == 1:
+            model_type = model_types[0]
+        else:
+            # Leave as tuple if more than one arch
+            model_type = model_types
 
         # Old transformers versions check
         both_exist = (is_model and is_peft) and not SUPPORTS_LLAMA32
@@ -267,8 +274,6 @@ def from_pretrained(
 
         if not was_disabled: enable_progress_bars()
 
-        model_type = model_config.model_type
-
         if model_type == "llama":
             scaling_type = None
             if getattr(model_config, "rope_scaling", None) is not None:
@@ -494,10 +499,11 @@ def from_pretrained(
     from transformers import AutoModelForVision2Seq
 pass
 
+# Must be alphabetically sorted for each entry
 DISABLE_COMPILE_MODEL_NAMES = [
-    "aya-vision",
+    "ayavision",
     "modernbert",
-    "granite-vision",
+    "granite,llavanext,siglipvisionmodel", # Granite-vision 3
 ]
 
 
@@ -574,20 +580,55 @@ def from_pretrained(
         if not use_exact_model_name:
             model_name = get_model_name(model_name, load_in_4bit)
 
+        # First check if it's a normal model via AutoConfig
+        from huggingface_hub.utils import disable_progress_bars, enable_progress_bars, are_progress_bars_disabled
+        was_disabled = are_progress_bars_disabled()
+        disable_progress_bars()
+
+        autoconfig_error = None
+        peft_error = None
+        model_config = None
+        peft_config = None
+        try:
+            model_config = AutoConfig.from_pretrained(
+                model_name,
+                token = token,
+                revision = revision,
+                trust_remote_code = trust_remote_code,
+            )
+            is_model = True
+        except Exception as error:
+            autoconfig_error = str(error)
+            is_model = False
+        try:
+            peft_config = PeftConfig.from_pretrained(
+                model_name,
+                token = token,
+                revision = revision,
+                trust_remote_code = trust_remote_code,
+            )
+            is_peft = True
+        except Exception as error:
+            peft_error = str(error)
+            is_peft = False
+        pass
+        model_types = extract_model_type_from_config(model_config or peft_config)
+        model_types_all = ",".join(model_types)
+
         # Check versions
         lowered_model_name = model_name.lower()
         os.environ["UNSLOTH_MODEL_NAME"] = lowered_model_name
         LATEST  = '\nPlease use transformers via `pip install --no-deps git+https://github.com/huggingface/transformers.git`'
         NIGHTLY = '\nPlease use nightly transformers via pip install --upgrade "transformers>=4.49.0"`'
         # Pixtral
-        if "pixtral" in lowered_model_name and transformers_version < Version("4.49.0"):
+        if "pixtral" in model_types_all and transformers_version < Version("4.49.0"):
             raise RuntimeError("Unsloth: Pixtral only works on transformers >= 4.49.0." + LATEST)
         # Qwen 2.5
-        elif "qwen2.5" in lowered_model_name and transformers_version < Version("4.49.0"):
+        elif "qwen25" in model_types_all and transformers_version < Version("4.49.0"):
             raise RuntimeError("Unsloth: Qwen 2.5 only works on transformers >= 4.49.0." + LATEST)
         # Gemma 3
-        elif "gemma-3" in lowered_model_name:
-            if "gemma-3n" in lowered_model_name:
+        elif "gemma3" in model_types_all:
+            if "gemma3n" in model_types_all:
                 if transformers_version < Version("4.53.0"):
                     raise RuntimeError("Unsloth: Gemma 3N only works on transformers >= 4.53.0" + LATEST)
                 os.environ["UNSLOTH_DISABLE_STATIC_GENERATION"] = "1"
@@ -605,10 +646,10 @@ def from_pretrained(
             # common in both gemma-3 and gemma-3n
             os.environ["UNSLOTH_HIGH_PRECISION_LAYERNORM"] = "1"
         # Cohere
-        elif "c4ai-command-a-03-2025" in lowered_model_name and transformers_version < Version("4.50.0.dev0"):
+        elif "cohere2" in model_types_all and transformers_version < Version("4.50.0.dev0"):
             raise RuntimeError("Unsloth: Cohere's Command model only works on transformers >= 4.50.0." + NIGHTLY)
         # Sesame
-        elif "csm-1b" in lowered_model_name:
+        elif "csm" in model_types_all:
             os.environ["UNSLOTH_COMPILE_DISABLE"] = "1" # Inference is too slow
             os.environ["UNSLOTH_DISABLE_STATIC_GENERATION"] = "1" # Sesame fails
             os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \
@@ -616,14 +657,14 @@ def from_pretrained(
                 "if name.endswith(('_proj', 'fc1', 'fc2', 'codebook', 'head')): module.to(torch.float16)"\
                 ";"
         # Granite 4
-        elif 'granite-4' in lowered_model_name:
+        elif 'granitemoehybrid' in model_types_all:
             # Granite-4 rms norms are stored as 16 bit, but we upcast
             os.environ["UNSLOTH_HIGH_PRECISION_LAYERNORM"] = "1"
             os.environ["UNSLOTH_DISABLE_STATIC_GENERATION"] = "1"
         # Olmo 2
-        elif "olmo-2" in lowered_model_name and transformers_version < Version("4.50.0.dev0"):
+        elif "olmo2" in model_types_all and transformers_version < Version("4.50.0.dev0"):
             raise RuntimeError("Unsloth: OLMo-2 only works on transformers >= 4.50.0." + NIGHTLY)
-        elif "falcon-h1" in lowered_model_name:
+        elif "falconh1" in model_types_all:
             # Falcon must use float32 Triton ie TRITON_F32_DEFAULT = 'ieee'
             # since Mamba kernels error out on using lower precision
             os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \
@@ -631,7 +672,7 @@ def from_pretrained(
                 "if name.endswith(('q_proj', 'k_proj', 'v_proj', 'o_proj', 'gate_proj', 'up_proj', 'down_proj', 'head')): module.to(torch.float16)"\
                 ";"\
                 "os.environ['TRITON_F32_DEFAULT'] = 'ieee'"
-        elif "gpt-oss" in lowered_model_name:
+        elif "gptoss" in model_types_all:
             os.environ["UNSLOTH_DISABLE_STATIC_GENERATION"] = "1"
             if not load_in_4bit:
                 # Only upcast MoE biases for MXFP4, not BnB
@@ -681,39 +722,6 @@ def from_pretrained(
             model_name = snapshot_download(model_name)
         pass
 
-        # First check if it's a normal model via AutoConfig
-        from huggingface_hub.utils import disable_progress_bars, enable_progress_bars, are_progress_bars_disabled
-        was_disabled = are_progress_bars_disabled()
-        disable_progress_bars()
-
-        autoconfig_error = None
-        peft_error = None
-        try:
-            model_config = AutoConfig.from_pretrained(
-                model_name,
-                token = token,
-                revision = revision,
-                trust_remote_code = trust_remote_code,
-            )
-            is_model = True
-        except Exception as error:
-            autoconfig_error = str(error)
-            is_model = False
-        try:
-            peft_config = PeftConfig.from_pretrained(
-                model_name,
-                token = token,
-                revision = revision,
-                trust_remote_code = trust_remote_code,
-            )
-            is_peft = True
-        except Exception as error:
-            peft_error = str(error)
-            is_peft = False
-        pass
-
-        # Both config.json and adapter_config.json should not exist!
-
         # Old transformers versions check
         both_exist = (is_model and is_peft) and not SUPPORTS_LLAMA32
 
@@ -799,8 +807,8 @@ def from_pretrained(
             if model_type_arch != "siglip": break
         global FORCE_FLOAT32
         for disable_name in FORCE_FLOAT32:
-            if (disable_name.lower() == model_type_arch.lower().replace("-", "_") or \
-                disable_name.lower() in model_name.lower()) and \
+            if (disable_name.lower() == model_type_arch.lower().replace("-", "").replace("_", "") or \
+                disable_name.lower() in model_types_all) and \
                 ((dtype == torch.float16) or not SUPPORTS_BFLOAT16):
                 os.environ["UNSLOTH_FORCE_FLOAT32"] = "1"
                 dtype = torch.bfloat16 # Change to bfloat16 loading
@@ -846,7 +854,7 @@ def from_pretrained(
             )
         pass
         # Fix SDPA
-        if "gemma-3n" in lowered_model_name:
+        if "gemma3n" in model_types_all:
             supports_sdpa = False
         pass
 

From fc5d91de3b2200e6a4a32e865c5f18272271de5a Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Tue, 9 Sep 2025 22:02:56 -0700
Subject: [PATCH 114/154] Update loader.py

---
 unsloth/models/loader.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py
index 6cefe33aaf..44a74601d9 100644
--- a/unsloth/models/loader.py
+++ b/unsloth/models/loader.py
@@ -205,6 +205,7 @@ def from_pretrained(
             is_peft = False
         pass
         model_types = extract_model_type_from_config(model_config or peft_config)
+        print("model_types", model_types)
         if len(model_types) == 1:
             model_type = model_types[0]
         else:
@@ -614,6 +615,7 @@ def from_pretrained(
         pass
         model_types = extract_model_type_from_config(model_config or peft_config)
         model_types_all = ",".join(model_types)
+        print("model_types", model_types)
 
         # Check versions
         lowered_model_name = model_name.lower()

From 702a9ead13538d5a930c9a2f644fb92671dd35f2 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Tue, 9 Sep 2025 22:11:08 -0700
Subject: [PATCH 115/154] get_transformers_model_type

---
 unsloth/models/_utils.py | 54 ----------------------------------------
 unsloth/models/loader.py | 26 ++++++-------------
 2 files changed, 8 insertions(+), 72 deletions(-)

diff --git a/unsloth/models/_utils.py b/unsloth/models/_utils.py
index f961a49de5..56b98489f6 100644
--- a/unsloth/models/_utils.py
+++ b/unsloth/models/_utils.py
@@ -18,7 +18,6 @@
     "SUPPORTS_BFLOAT16",
     "is_bfloat16_supported",
     "is_vLLM_available",
-    "extract_model_type_from_config",
 
     "prepare_model_for_kbit_training",
     "xformers",
@@ -1562,56 +1561,3 @@ def _prepare_model_for_qat(model: torch.nn.Module, qat_scheme: str) -> torch.nn.
     quantize_(model, QATConfig(base_config, step="prepare"), filter_fn=filter_fn)
     return model
 pass
-
-
-def extract_model_type_from_config(config):
-    """ Gets model_type from config file - can be PEFT or normal HF """
-    if config is None:
-        raise TypeError(f"Unsloth: Cannot determine model type for config file: {str(config)}")
-    model_types = None
-    from peft import PeftConfig
-    if issubclass(type(config), PeftConfig):
-        model_type_list = re.finditer(r"transformers\.models\.([^\.]{2,})\.modeling_\1", str(config))
-        model_type_list = list(model_type_list)
-        # Use transformers.models.gpt_oss.modeling_gpt_oss
-        if len(model_type_list) != 0:
-            model_type = model_type_list[0].group(1)
-            model_types = [model_type]
-        elif hasattr(config, "auto_mapping"):
-            # Use GptOssForCausalLM
-            model_type = config.auto_mapping.get("base_model_class", None)
-            if model_type is None:
-                # Last resort use model name unsloth/gpt-oss-20b-unsloth-bnb-4bit
-                model_type = config.base_model_name_or_path
-                model_type = os.path.split(model_type)[-1]
-            model_types = [model_type]
-    else:
-        from collections.abc import Mapping, Sequence
-        def find_values(data, target_key):
-            stack = [data]
-            while stack:
-                obj = stack.pop()
-                if isinstance(obj, Mapping):
-                    # Emit values for matches
-                    if target_key in obj:
-                        yield obj[target_key]
-                    # Keep walking into nested values
-                    stack.extend(obj.values())
-                elif isinstance(obj, Sequence) and not isinstance(obj, (str, bytes, bytearray)):
-                    # Walk sequences (lists/tuples/sets), but not strings/bytes
-                    stack.extend(obj)
-        model_types = list(find_values(getattr(config, "to_dict", lambda *args, **kwargs: {})(), "model_type"))
-    pass
-    if model_types is None:
-        raise TypeError(f"Unsloth: Cannot determine model type for config file: {str(config)}")
-    # Standardize model_type
-    final_model_types = []
-    for model_type in model_types:
-        model_type = model_type.lower()
-        model_type = model_type.replace("_", "")
-        model_type = model_type.replace("-", "")
-        model_type = model_type.replace("/", "")
-        model_type = model_type.replace(".", "")
-        final_model_types.append(model_type)
-    return tuple(sorted(final_model_types))
-pass
diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py
index 44a74601d9..7e8a32caa7 100644
--- a/unsloth/models/loader.py
+++ b/unsloth/models/loader.py
@@ -20,7 +20,6 @@
     HAS_FLASH_ATTENTION_SOFTCAPPING,
     USE_MODELSCOPE,
     get_transformers_model_type,
-    extract_model_type_from_config,
 )
 from .granite import FastGraniteModel
 from .llama   import FastLlamaModel, logger
@@ -204,8 +203,7 @@ def from_pretrained(
             peft_error = str(error)
             is_peft = False
         pass
-        model_types = extract_model_type_from_config(model_config or peft_config)
-        print("model_types", model_types)
+        model_types = get_transformers_model_type(model_config or peft_config)
         if len(model_types) == 1:
             model_type = model_types[0]
         else:
@@ -581,6 +579,12 @@ def from_pretrained(
         if not use_exact_model_name:
             model_name = get_model_name(model_name, load_in_4bit)
 
+        # Check modelscope
+        if USE_MODELSCOPE and not os.path.exists(model_name):
+            from modelscope import snapshot_download
+            model_name = snapshot_download(model_name)
+        pass
+
         # First check if it's a normal model via AutoConfig
         from huggingface_hub.utils import disable_progress_bars, enable_progress_bars, are_progress_bars_disabled
         was_disabled = are_progress_bars_disabled()
@@ -613,9 +617,8 @@ def from_pretrained(
             peft_error = str(error)
             is_peft = False
         pass
-        model_types = extract_model_type_from_config(model_config or peft_config)
+        model_types = get_transformers_model_type(model_config or peft_config)
         model_types_all = ",".join(model_types)
-        print("model_types", model_types)
 
         # Check versions
         lowered_model_name = model_name.lower()
@@ -719,11 +722,6 @@ def from_pretrained(
             os.environ["UNSLOTH_DISABLE_STATIC_GENERATION"] = "1"
         pass
 
-        if USE_MODELSCOPE and not os.path.exists(model_name):
-            from modelscope import snapshot_download
-            model_name = snapshot_download(model_name)
-        pass
-
         # Old transformers versions check
         both_exist = (is_model and is_peft) and not SUPPORTS_LLAMA32
 
@@ -793,15 +791,7 @@ def from_pretrained(
         else:
             redirector = contextlib.redirect_stdout(open(os.devnull, "w"))
 
-        # Get model types like Gemma3 etc
-        model_types = get_transformers_model_type(
-            model_name        = model_name,
-            token             = token,
-            revision          = revision,
-            trust_remote_code = trust_remote_code,
-        )
         model_types = ["siglip"] + model_types
-
         # Set forced float32 env flag
         os.environ["UNSLOTH_FORCE_FLOAT32"] = "0"
         do_forced_float32 = False

From 8ece4a6f915e27f536202017132d031094a518ac Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Tue, 9 Sep 2025 22:14:10 -0700
Subject: [PATCH 116/154] Update loader.py

---
 unsloth/models/loader.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py
index 7e8a32caa7..43c14050c2 100644
--- a/unsloth/models/loader.py
+++ b/unsloth/models/loader.py
@@ -204,6 +204,7 @@ def from_pretrained(
             is_peft = False
         pass
         model_types = get_transformers_model_type(model_config or peft_config)
+        print("model_types", model_types)
         if len(model_types) == 1:
             model_type = model_types[0]
         else:
@@ -619,6 +620,7 @@ def from_pretrained(
         pass
         model_types = get_transformers_model_type(model_config or peft_config)
         model_types_all = ",".join(model_types)
+        print("model_types", model_types)
 
         # Check versions
         lowered_model_name = model_name.lower()

From f3ac0e3b6d382dd432af4a49c919e4d8a2700480 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Tue, 9 Sep 2025 22:18:59 -0700
Subject: [PATCH 117/154] Update loader.py

---
 unsloth/models/loader.py | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py
index 43c14050c2..27fb3afe41 100644
--- a/unsloth/models/loader.py
+++ b/unsloth/models/loader.py
@@ -84,7 +84,7 @@
 FORCE_FLOAT32 = [
     "gemma3",
     "gemma3n",
-    "gptoss",
+    "gpt_oss",
 ]
 
 class FastLanguageModel(FastLlamaModel):
@@ -204,7 +204,6 @@ def from_pretrained(
             is_peft = False
         pass
         model_types = get_transformers_model_type(model_config or peft_config)
-        print("model_types", model_types)
         if len(model_types) == 1:
             model_type = model_types[0]
         else:
@@ -501,9 +500,9 @@ def from_pretrained(
 
 # Must be alphabetically sorted for each entry
 DISABLE_COMPILE_MODEL_NAMES = [
-    "ayavision",
+    "aya_vision",
     "modernbert",
-    "granite,llavanext,siglipvisionmodel", # Granite-vision 3
+    "granite,llava_next", # Granite-vision 3
 ]
 
 
@@ -620,7 +619,6 @@ def from_pretrained(
         pass
         model_types = get_transformers_model_type(model_config or peft_config)
         model_types_all = ",".join(model_types)
-        print("model_types", model_types)
 
         # Check versions
         lowered_model_name = model_name.lower()
@@ -631,7 +629,7 @@ def from_pretrained(
         if "pixtral" in model_types_all and transformers_version < Version("4.49.0"):
             raise RuntimeError("Unsloth: Pixtral only works on transformers >= 4.49.0." + LATEST)
         # Qwen 2.5
-        elif "qwen25" in model_types_all and transformers_version < Version("4.49.0"):
+        elif "qwen2_5" in model_types_all and transformers_version < Version("4.49.0"):
             raise RuntimeError("Unsloth: Qwen 2.5 only works on transformers >= 4.49.0." + LATEST)
         # Gemma 3
         elif "gemma3" in model_types_all:
@@ -671,7 +669,7 @@ def from_pretrained(
         # Olmo 2
         elif "olmo2" in model_types_all and transformers_version < Version("4.50.0.dev0"):
             raise RuntimeError("Unsloth: OLMo-2 only works on transformers >= 4.50.0." + NIGHTLY)
-        elif "falconh1" in model_types_all:
+        elif "falcon_h1" in model_types_all:
             # Falcon must use float32 Triton ie TRITON_F32_DEFAULT = 'ieee'
             # since Mamba kernels error out on using lower precision
             os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \
@@ -679,7 +677,7 @@ def from_pretrained(
                 "if name.endswith(('q_proj', 'k_proj', 'v_proj', 'o_proj', 'gate_proj', 'up_proj', 'down_proj', 'head')): module.to(torch.float16)"\
                 ";"\
                 "os.environ['TRITON_F32_DEFAULT'] = 'ieee'"
-        elif "gptoss" in model_types_all:
+        elif "gpt_oss" in model_types_all:
             os.environ["UNSLOTH_DISABLE_STATIC_GENERATION"] = "1"
             if not load_in_4bit:
                 # Only upcast MoE biases for MXFP4, not BnB

From d2b0d4193a6e32cf370f2008d8ad05011a6ad0a6 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Tue, 9 Sep 2025 22:22:15 -0700
Subject: [PATCH 118/154] Update loader.py

---
 unsloth/models/loader.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py
index 27fb3afe41..de2f32f9af 100644
--- a/unsloth/models/loader.py
+++ b/unsloth/models/loader.py
@@ -809,7 +809,7 @@ def from_pretrained(
         # Patch gradient checkpointing
         if use_gradient_checkpointing == "unsloth":
             patch_unsloth_smart_gradient_checkpointing(dtype = dtype)
-
+        print(model_types)
         with redirector:
             patch_loss_functions(torch_compile = False)
             model_types, supports_sdpa = unsloth_compile_transformers(

From e5920fe7027e7caf8602fc9a7d602a84ef197bed Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Wed, 10 Sep 2025 01:21:49 -0700
Subject: [PATCH 119/154] Update rl.py

---
 unsloth/models/rl.py | 42 ++++++++++++++++++++++--------------------
 1 file changed, 22 insertions(+), 20 deletions(-)

diff --git a/unsloth/models/rl.py b/unsloth/models/rl.py
index f342a4d86b..14b75f6746 100644
--- a/unsloth/models/rl.py
+++ b/unsloth/models/rl.py
@@ -44,6 +44,8 @@
 }
 
 from trl import __version__ as trl_version
+from unsloth_zoo.utils import Version
+trl_version = Version(trl_version)
 
 def vLLMSamplingParams(**kwargs):
     from vllm import SamplingParams
@@ -804,7 +806,7 @@ def patch_functions(RLTrainer, trainer_file, RLTrainer_name, all_imports, import
             " " * 12 + "if (getattr(args, 'use_vllm', False) == False):\n" + \
             " " * 16 + "args.use_vllm = True\n"
 
-            if "grpo" in trainer_file and trl_version >= "0.18":
+            if "grpo" in trainer_file and trl_version >= Version("0.18.0"):
                 # If model has vllm_engine, then use vllm in colocate mode. Donot wait for server
                 vllm_setter += \
                 " " * 12 + "args.vllm_mode='colocate'\n"
@@ -850,26 +852,27 @@ def patch_functions(RLTrainer, trainer_file, RLTrainer_name, all_imports, import
                 sampling_params # Add spaces
 
             # count the indentation of last line of sampling_params.
-            last_line = sampling_params.split("\n")[-1]
-            last_prev_line = sampling_params.split("\n")[-2]
-            last_prev_indentation = len(last_prev_line) - len(last_prev_line.lstrip())
-            last_indentation = len(last_line) - len(last_line.lstrip())
-
-
-            # Add extra arguments to SamplingParams
-            extra = "**getattr(getattr(args, 'vllm_sampling_params', vLLMSamplingParams()), '_set_kwargs', {})"
-            # Backwards replace
-            to_replace = ",\n" + " "*last_prev_indentation + extra + ",\n" + " "*last_indentation + ")"
-            sampling_params = to_replace.join(sampling_params.rsplit(")", 1))
-            # Strip multiple commas
-            sampling_params = re.sub(r"[\,][\s]{0,}\,", ",", sampling_params)
-
-            new_vllm_part = \
-                f"\n{' '*8}if {args}.use_vllm:\n{sampling_params}"\
-                f"\n{' '*8}else:\n"
+            splitted_sampling_params = sampling_params.split("\n")
+            if len(splitted_sampling_params) >= 2:
+                last_line = splitted_sampling_params[-1]
+                last_prev_line = splitted_sampling_params[-2]
+                last_prev_indentation = len(last_prev_line) - len(last_prev_line.lstrip())
+                last_indentation = len(last_line) - len(last_line.lstrip())
+
+                # Add extra arguments to SamplingParams
+                extra = "**getattr(getattr(args, 'vllm_sampling_params', vLLMSamplingParams()), '_set_kwargs', {})"
+                # Backwards replace
+                to_replace = ",\n" + " "*last_prev_indentation + extra + ",\n" + " "*last_indentation + ")"
+                sampling_params = to_replace.join(sampling_params.rsplit(")", 1))
+                # Strip multiple commas
+                sampling_params = re.sub(r"[\,][\s]{0,}\,", ",", sampling_params)
+
+                new_vllm_part = \
+                    f"\n{' '*8}if {args}.use_vllm:\n{sampling_params}"\
+                    f"\n{' '*8}else:\n"
         pass
 
-        if trl_version >= "0.18":
+        if trl_version >= Version("0.18.0"):
             # Replace LLM init with already existing vLLM engine for colocate mode
             vllm_llm_init_pattern = r"self\.llm\s*=\s*LLM\(.*?\)*\)\s*?\n(?!,)"
             vllm_llm_replacement = "self.llm = model.vllm_engine\n"
@@ -881,7 +884,6 @@ def patch_functions(RLTrainer, trainer_file, RLTrainer_name, all_imports, import
             )
 
         init = init.replace(vllm_part, new_vllm_part)
-
     pass
 
     # Search for vLLM calling in all child functions

From bf0367eb45dc731104968052415184b8e2d080dc Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Wed, 10 Sep 2025 01:24:02 -0700
Subject: [PATCH 120/154] Update pyproject.toml

---
 pyproject.toml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index c2cb87ce3b..c860a92db6 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -37,7 +37,7 @@ triton = [
 ]
 
 huggingface = [
-    "unsloth_zoo>=2025.9.3",
+    "unsloth_zoo>=2025.9.4",
     "packaging",
     "tyro",
     "transformers>=4.51.3,!=4.52.0,!=4.52.1,!=4.52.2,!=4.52.3,!=4.53.0,!=4.54.0,!=4.55.0,!=4.55.1",
@@ -453,7 +453,7 @@ colab-ampere-torch220 = [
     "flash-attn>=2.6.3",
 ]
 colab-new = [
-    "unsloth_zoo>=2025.9.3",
+    "unsloth_zoo>=2025.9.4",
     "packaging",
     "tyro",
     "transformers>=4.51.3,!=4.47.0,!=4.52.0,!=4.52.1,!=4.52.2,!=4.52.3,!=4.53.0,!=4.54.0,!=4.55.0,!=4.55.1",

From d2c2cc195a99b6b4dbeab7b6f65d1b302b7a9591 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Wed, 10 Sep 2025 01:26:58 -0700
Subject: [PATCH 121/154] Update loader.py

---
 unsloth/models/loader.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py
index de2f32f9af..a57deef000 100644
--- a/unsloth/models/loader.py
+++ b/unsloth/models/loader.py
@@ -809,7 +809,6 @@ def from_pretrained(
         # Patch gradient checkpointing
         if use_gradient_checkpointing == "unsloth":
             patch_unsloth_smart_gradient_checkpointing(dtype = dtype)
-        print(model_types)
         with redirector:
             patch_loss_functions(torch_compile = False)
             model_types, supports_sdpa = unsloth_compile_transformers(

From 35ca1776b08f81f05e16e268f09cb444f1af1e1b Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Fri, 12 Sep 2025 18:53:46 -0700
Subject: [PATCH 122/154] Update loader.py

---
 unsloth/models/loader.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py
index a57deef000..5ad283d39a 100644
--- a/unsloth/models/loader.py
+++ b/unsloth/models/loader.py
@@ -204,6 +204,7 @@ def from_pretrained(
             is_peft = False
         pass
         model_types = get_transformers_model_type(model_config or peft_config)
+        print("207", model_types_all)
         if len(model_types) == 1:
             model_type = model_types[0]
         else:
@@ -619,6 +620,7 @@ def from_pretrained(
         pass
         model_types = get_transformers_model_type(model_config or peft_config)
         model_types_all = ",".join(model_types)
+        print("623", model_types_all)
 
         # Check versions
         lowered_model_name = model_name.lower()

From 2eaf868efa817657405b4b67416b91be171b6285 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Fri, 12 Sep 2025 18:55:47 -0700
Subject: [PATCH 123/154] Update loader.py

---
 unsloth/models/loader.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py
index 5ad283d39a..fd41390889 100644
--- a/unsloth/models/loader.py
+++ b/unsloth/models/loader.py
@@ -204,7 +204,7 @@ def from_pretrained(
             is_peft = False
         pass
         model_types = get_transformers_model_type(model_config or peft_config)
-        print("207", model_types_all)
+        print("207", model_types)
         if len(model_types) == 1:
             model_type = model_types[0]
         else:

From 7c892e798fa9ff71f25185ad5e4fb353f3b1a7e6 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Sat, 13 Sep 2025 02:21:02 -0700
Subject: [PATCH 124/154] Update loader.py

---
 unsloth/models/loader.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py
index fd41390889..ab258f3ed9 100644
--- a/unsloth/models/loader.py
+++ b/unsloth/models/loader.py
@@ -204,7 +204,6 @@ def from_pretrained(
             is_peft = False
         pass
         model_types = get_transformers_model_type(model_config or peft_config)
-        print("207", model_types)
         if len(model_types) == 1:
             model_type = model_types[0]
         else:
@@ -620,11 +619,11 @@ def from_pretrained(
         pass
         model_types = get_transformers_model_type(model_config or peft_config)
         model_types_all = ",".join(model_types)
-        print("623", model_types_all)
 
         # Check versions
         lowered_model_name = model_name.lower()
-        os.environ["UNSLOTH_MODEL_NAME"] = lowered_model_name
+        if os.environ.get("UNSLOTH_MODEL_NAME", "") == "":
+            os.environ["UNSLOTH_MODEL_NAME"] = lowered_model_name
         LATEST  = '\nPlease use transformers via `pip install --no-deps git+https://github.com/huggingface/transformers.git`'
         NIGHTLY = '\nPlease use nightly transformers via pip install --upgrade "transformers>=4.49.0"`'
         # Pixtral

From 72ff24c5ebff286427f46d47a46b82627533ed7f Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Sat, 13 Sep 2025 21:15:04 -0700
Subject: [PATCH 125/154] Versioning

---
 pyproject.toml           | 4 ++--
 unsloth/__init__.py      | 2 +-
 unsloth/models/_utils.py | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index d77683c00a..8df936f807 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -37,7 +37,7 @@ triton = [
 ]
 
 huggingface = [
-    "unsloth_zoo>=2025.9.5",
+    "unsloth_zoo>=2025.9.6",
     "packaging",
     "tyro",
     "transformers>=4.51.3,!=4.52.0,!=4.52.1,!=4.52.2,!=4.52.3,!=4.53.0,!=4.54.0,!=4.55.0,!=4.55.1",
@@ -453,7 +453,7 @@ colab-ampere-torch220 = [
     "flash-attn>=2.6.3",
 ]
 colab-new = [
-    "unsloth_zoo>=2025.9.5",
+    "unsloth_zoo>=2025.9.6",
     "packaging",
     "tyro",
     "transformers>=4.51.3,!=4.47.0,!=4.52.0,!=4.52.1,!=4.52.2,!=4.52.3,!=4.53.0,!=4.54.0,!=4.55.0,!=4.55.1",
diff --git a/unsloth/__init__.py b/unsloth/__init__.py
index 8255e505a8..1be571b69b 100644
--- a/unsloth/__init__.py
+++ b/unsloth/__init__.py
@@ -240,7 +240,7 @@ def is_bf16_supported(): return SUPPORTS_BFLOAT16
 # Check for unsloth_zoo
 try:
     unsloth_zoo_version = importlib_version("unsloth_zoo")
-    if Version(unsloth_zoo_version) < Version("2025.9.5"):
+    if Version(unsloth_zoo_version) < Version("2025.9.6"):
         print(
             "Unsloth: Please update Unsloth and Unsloth-Zoo to the latest version!\n"\
             "Do this via `pip install --upgrade --force-reinstall --no-cache-dir --no-deps unsloth unsloth_zoo`"
diff --git a/unsloth/models/_utils.py b/unsloth/models/_utils.py
index e3ac56ac83..4cf34aa007 100644
--- a/unsloth/models/_utils.py
+++ b/unsloth/models/_utils.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__ = "2025.9.4"
+__version__ = "2025.9.5"
 
 __all__ = [
     "SUPPORTS_BFLOAT16",

From 227842c5b87203c7c4ff1c2fc76763c79f33493c Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Mon, 15 Sep 2025 00:00:15 -0700
Subject: [PATCH 126/154] Update _utils.py

---
 unsloth/models/_utils.py | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/unsloth/models/_utils.py b/unsloth/models/_utils.py
index 4cf34aa007..707d7220b2 100644
--- a/unsloth/models/_utils.py
+++ b/unsloth/models/_utils.py
@@ -614,6 +614,18 @@ def _is_openai_available(): return False
 # Get Xformers
 try:
     from xformers import __version__ as xformers_version
+    # [TODO] Xformers does NOT work on RTX 50x (12), B200 (10), Jetson (11)
+    # See https://github.com/facebookresearch/xformers/issues/1329
+    # CUDA error (/workspace/xfrm2/third_party/flash-attention/hopper/flash_fwd_launch_template.h:188)
+    major_version, minor_version = torch.cuda.get_device_capability()
+    if (
+        f"{major_version}.{minor_version}" in ("10.0", "11.0", "12.0")) and \
+        (xformers_version in (Version("0.0.32.post2"),)
+    ):
+        raise NotImplementedError(
+            "Unsloth: Xformers does not work in RTX 50X, Blackwell GPUs as of yet."
+        )
+    pass
     # Temporarily disable 0.0.27 and higher - inference issues
     if False: #Version(xformers_version) >= Version("0.0.27"):
         raise ImportError(
@@ -661,7 +673,9 @@ def _is_openai_available(): return False
     pass
     import xformers.ops.fmha as xformers
     xformers_attention = xformers.memory_efficient_attention
-except:
+except Exception as e:
+    print("========\nSwitching to SDPA PyTorch native attention which is slightly slower.\n========\n")
+    print(str(e))
     xformers = None
     xformers_attention = None
     xformers_version = None

From 505ae67fe77b77c04faa7cfb3284fd25441b5ade Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Mon, 15 Sep 2025 00:03:50 -0700
Subject: [PATCH 127/154] Update _utils.py

---
 unsloth/models/_utils.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/unsloth/models/_utils.py b/unsloth/models/_utils.py
index 707d7220b2..3878367650 100644
--- a/unsloth/models/_utils.py
+++ b/unsloth/models/_utils.py
@@ -84,7 +84,7 @@
 from unsloth_zoo.utils import Version
 from importlib.metadata import version as importlib_version
 from unsloth import DEVICE_TYPE, DEVICE_COUNT
-
+from unsloth_zoo.log import logger
 from unsloth_zoo.tokenizer_utils import (
     patch_tokenizer as _patch_tokenizer,
 )
@@ -608,8 +608,6 @@ def _is_openai_available(): return False
 elif DEVICE_TYPE == "xpu":
     SUPPORTS_BFLOAT16 = True
 
-from transformers.models.llama.modeling_llama import logger
-
 # =============================================
 # Get Xformers
 try:

From 80465dcabe0bd75dc8b43fddf3d8d672608fd087 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Mon, 15 Sep 2025 00:06:36 -0700
Subject: [PATCH 128/154] Update _utils.py

---
 unsloth/models/_utils.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/unsloth/models/_utils.py b/unsloth/models/_utils.py
index 3878367650..2abc6b269b 100644
--- a/unsloth/models/_utils.py
+++ b/unsloth/models/_utils.py
@@ -617,13 +617,12 @@ def _is_openai_available(): return False
     # CUDA error (/workspace/xfrm2/third_party/flash-attention/hopper/flash_fwd_launch_template.h:188)
     major_version, minor_version = torch.cuda.get_device_capability()
     if (
-        f"{major_version}.{minor_version}" in ("10.0", "11.0", "12.0")) and \
-        (xformers_version in (Version("0.0.32.post2"),)
+        (f"{major_version}.{minor_version}" in ("10.0", "11.0", "12.0")) and \
+        (Version(xformers_version) in (Version("0.0.32.post2"),))
     ):
         raise NotImplementedError(
             "Unsloth: Xformers does not work in RTX 50X, Blackwell GPUs as of yet."
         )
-    pass
     # Temporarily disable 0.0.27 and higher - inference issues
     if False: #Version(xformers_version) >= Version("0.0.27"):
         raise ImportError(

From 4150e081ada733352975234f5a42f97a696a53c3 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Mon, 15 Sep 2025 01:21:43 -0700
Subject: [PATCH 129/154] Update _utils.py

---
 unsloth/models/_utils.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/unsloth/models/_utils.py b/unsloth/models/_utils.py
index 2abc6b269b..a559d34ca4 100644
--- a/unsloth/models/_utils.py
+++ b/unsloth/models/_utils.py
@@ -621,7 +621,11 @@ def _is_openai_available(): return False
         (Version(xformers_version) in (Version("0.0.32.post2"),))
     ):
         raise NotImplementedError(
-            "Unsloth: Xformers does not work in RTX 50X, Blackwell GPUs as of yet."
+            "Unsloth: Xformers does not work in RTX 50X, Blackwell GPUs as of yet. Please build from source via\n"\
+            "```\n"\
+            "pip install ninja\n"\
+            "pip install -v --no-build-isolation -U git+https://github.com/facebookresearch/xformers.git@main#egg=xformers\n"\
+            "```\n"
         )
     # Temporarily disable 0.0.27 and higher - inference issues
     if False: #Version(xformers_version) >= Version("0.0.27"):

From 032c2c840067870adbbba78ad3088ccd5e2ff849 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Mon, 15 Sep 2025 22:52:32 -0700
Subject: [PATCH 130/154] Update vision.py

---
 unsloth/models/vision.py | 23 ++++++++---------------
 1 file changed, 8 insertions(+), 15 deletions(-)

diff --git a/unsloth/models/vision.py b/unsloth/models/vision.py
index 1451ed92cd..2c77169cb9 100644
--- a/unsloth/models/vision.py
+++ b/unsloth/models/vision.py
@@ -636,24 +636,17 @@ def get_peft_model(
                 torch.xpu.empty_cache()
         pass
         max_seq_length = model.max_seq_length
-        # if we pass loftq_config = None we will get an error
+        # If we pass loftq_config = None we will get an error
         loftq_config = validate_loftq_config(loftq_config, lora_dropout, bias, init_lora_weights, model)
-        lora_config_dict = {
-            "r"                 : r,
-            "lora_alpha"        : lora_alpha,
-            "target_modules"    : target_modules,
-            "target_parameters" : kwargs.get("target_parameters", None),
-            "lora_dropout"      : lora_dropout,
-            "bias"              : bias,
-            "task_type"         : task_type,
-            "modules_to_save"   : modules_to_save,
-            "use_rslora"        : use_rslora,
-            "init_lora_weights" : init_lora_weights,
-            "loftq_config"      : loftq_config,
-        }
+
+        # Get only allowed parameters for LoraConfig
+        local_variables = { **locals(), **kwargs, }
+        del local_variables["kwargs"]
+        allowed_parameters = inspect.signature(LoraConfig).parameters.keys()
         lora_config = LoraConfig(
-            **{k:v for k,v in lora_config_dict.items() if k in LoraConfig.__doc__},
+            **{ k : v for k, v in local_variables.items() if k in allowed_parameters },
         )
+        print(lora_config)
         model = prepare_model_for_kbit_training(
             model,
             use_gradient_checkpointing = use_gradient_checkpointing,

From b105aae096e46646bf9ea5b7e0f541cad981f066 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Mon, 15 Sep 2025 23:00:14 -0700
Subject: [PATCH 131/154] Update vision.py

---
 unsloth/models/vision.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/unsloth/models/vision.py b/unsloth/models/vision.py
index 2c77169cb9..f8c0f866f9 100644
--- a/unsloth/models/vision.py
+++ b/unsloth/models/vision.py
@@ -646,7 +646,6 @@ def get_peft_model(
         lora_config = LoraConfig(
             **{ k : v for k, v in local_variables.items() if k in allowed_parameters },
         )
-        print(lora_config)
         model = prepare_model_for_kbit_training(
             model,
             use_gradient_checkpointing = use_gradient_checkpointing,

From 400df38fb04aaec151c1d5b1e0d2a1ac23ceca6f Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Tue, 16 Sep 2025 03:00:39 -0700
Subject: [PATCH 132/154] Fix DataParallel

---
 unsloth/models/llama.py | 7 +++++--
 unsloth/models/rl.py    | 6 ++++++
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/unsloth/models/llama.py b/unsloth/models/llama.py
index f7a53d05fd..e04ffd029e 100644
--- a/unsloth/models/llama.py
+++ b/unsloth/models/llama.py
@@ -1200,7 +1200,8 @@ def _CausalLM_fast_forward(
 
             if not RETURN_LOGITS and labels is not None:
 
-                n_items = kwargs.get("num_items_in_batch", None) or kwargs.get("n_items", None)
+                n_items = kwargs.get("num_items_in_batch", None)
+                if n_items is None: n_items = kwargs.get("n_items", None)
 
                 if self.config.model_type == "falcon_h1":
                     hidden_states = hidden_states * self.config.lm_head_multiplier
@@ -1264,12 +1265,14 @@ def _CausalLM_fast_forward(
             shift_labels[..., :-1] = labels[..., 1:]
             shift_labels[..., -1] = -100
             # shift_labels = torch.hstack((labels[..., 1:], self.extra_ignored_labels[:labels.shape[0]]))
+            n_items = kwargs.get("num_items_in_batch", None)
+            if n_items is None: n_items = kwargs.get("n_items", None)
             loss = fast_cross_entropy_loss(
                 logits = shift_logits,
                 labels = shift_labels,
                 logit_softcapping = logit_softcapping,
                 logit_scaling     = logit_scaling,
-                n_items           = kwargs.get("num_items_in_batch", None) or kwargs.get("n_items", None),
+                n_items           = n_items,
             )
         else:
             if logit_scaling != 0:
diff --git a/unsloth/models/rl.py b/unsloth/models/rl.py
index 53f5eee66c..9e940c763b 100644
--- a/unsloth/models/rl.py
+++ b/unsloth/models/rl.py
@@ -110,6 +110,7 @@ def generate_with_clone(*args, **kwargs):
 from contextlib import nullcontext
 from torch.nn import functional as F
 from transformers import DataCollatorForSeq2Seq, DataCollatorForLanguageModeling as TransformersDataCollatorForLanguageModeling
+from transformers.training_args import ParallelMode
 
 torch_compile_options = {{
     "epilogue_fusion"   : True,
@@ -160,6 +161,11 @@ def __init__({RLTrainer_arguments},
     ):
         if args is None: args = Unsloth{RLConfig_name}()
 {RLTrainer_extra_args}
+        # [TODO] Fix up DataParallel multiplying batch sizes
+        # [TODO] DDP works, but DP seems to not work? [TODO]
+        if getattr(args, "parallel_mode", None) == ParallelMode.NOT_DISTRIBUTED and args.n_gpu > 1:
+            if getattr(args, "_n_gpu", 1) != 1:
+                args._n_gpu = 1
         super().__init__({RLTrainer_call_args}{RLTrainer_kwargs})
 {RLTrainer_post}
 pass

From 809a8b3b206db30c676852af07270db8c44b7319 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Tue, 16 Sep 2025 03:02:52 -0700
Subject: [PATCH 133/154] Update _utils.py

---
 unsloth/models/_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/unsloth/models/_utils.py b/unsloth/models/_utils.py
index a559d34ca4..194d18771c 100644
--- a/unsloth/models/_utils.py
+++ b/unsloth/models/_utils.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__ = "2025.9.5"
+__version__ = "2025.9.6"
 
 __all__ = [
     "SUPPORTS_BFLOAT16",

From 3dcc0911eb5e5ae360456e281f3e9ca99c5f95b8 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Wed, 17 Sep 2025 02:13:07 -0700
Subject: [PATCH 134/154] Update rl.py

---
 unsloth/models/rl.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/unsloth/models/rl.py b/unsloth/models/rl.py
index 3e2fcf22be..6f1f000e68 100644
--- a/unsloth/models/rl.py
+++ b/unsloth/models/rl.py
@@ -271,14 +271,17 @@ def _patch_trl_rl_trainers(trainer_file = "grpo_trainer"):
         "if not force_float32 and (float16 and use_bf16): raise TypeError('Unsloth: Model is in float16 precision but you want to use bfloat16 precision. Set fp16 to `True` and bf16 to `False`')\n"\
         "if not force_float32 and (not float16 and use_fp16): raise TypeError('Unsloth: Model is in bfloat16 precision but you want to use float16 precision. Set fp16 to `False` and bf16 to `True`')\n"\
         "if force_float32:\n"\
+        "    # Forced float32 training\n"\
         "    args.fp16 = False\n"\
         "    args.bf16 = False\n"\
         "    os.environ['ACCELERATE_MIXED_PRECISION'] = 'no'\n"\
         "elif (not use_bf16 and not use_fp16) and mixed_precision_dtype == 'float32':\n"\
+        "    # Mixed precision training\n"\
         "    args.fp16 = float16\n"\
         "    args.bf16 = not float16\n"\
         "    os.environ['ACCELERATE_MIXED_PRECISION'] = 'fp16' if float16 else 'bf16'\n"
         "elif mixed_precision_dtype == 'bfloat16':\n"\
+        "    # Both False since bfloat16 full finetuning doesn't do any autocasting.\n"\
         "    args.fp16 = False\n"\
         "    args.bf16 = False\n"\
         "    os.environ['ACCELERATE_MIXED_PRECISION'] = 'no'\n"

From 28b1d50016921db9ada7bcdcdb67c61b92c9f379 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Wed, 17 Sep 2025 02:40:22 -0700
Subject: [PATCH 135/154] Update synthetic.py

---
 unsloth/dataprep/synthetic.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/unsloth/dataprep/synthetic.py b/unsloth/dataprep/synthetic.py
index 52c114fab6..60742b7fdc 100644
--- a/unsloth/dataprep/synthetic.py
+++ b/unsloth/dataprep/synthetic.py
@@ -28,6 +28,7 @@
     patch_vllm,
     delete_vllm,
 )
+from unsloth_zoo.log import logger 
 import numpy as np
 
 from .synthetic_configs import (
@@ -117,6 +118,7 @@ def __init__(
             else:
                 subprocess_commands += ["--" + flag, which,]
         pass
+        logger.info(subprocess_commands)
         vllm_process = subprocess.Popen(
             subprocess_commands,
             stdout = subprocess.PIPE,

From de162d3e2a724dd178d24961bd9b989a68b70f2d Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Wed, 17 Sep 2025 02:56:36 -0700
Subject: [PATCH 136/154] Update synthetic.py

---
 unsloth/dataprep/synthetic.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/unsloth/dataprep/synthetic.py b/unsloth/dataprep/synthetic.py
index 60742b7fdc..2cca155d6d 100644
--- a/unsloth/dataprep/synthetic.py
+++ b/unsloth/dataprep/synthetic.py
@@ -77,6 +77,7 @@ def __init__(
             return_args            = True,
             enable_lora            = False,
             use_bitsandbytes       = False,
+            compilation_config     = 3,
             **kwargs,
         )
         if "dtype" in engine_args:

From a507a7d82bb1792986ffaa99c9f10b4de7e6bba3 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Wed, 17 Sep 2025 03:03:32 -0700
Subject: [PATCH 137/154] Update synthetic.py

---
 unsloth/dataprep/synthetic.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/unsloth/dataprep/synthetic.py b/unsloth/dataprep/synthetic.py
index 2cca155d6d..d52f1df373 100644
--- a/unsloth/dataprep/synthetic.py
+++ b/unsloth/dataprep/synthetic.py
@@ -99,7 +99,7 @@ def __init__(
         if "model"  in engine_args: del engine_args["model"]
         if "compilation_config" in engine_args:
             # Cannot parse in vllm serve
-            engine_args["compilation_config"] = 3
+            engine_args["compilation_config"] = "'" + str(engine_args["compilation_config"]) + "'"
 
         subprocess_commands = [
             "vllm", "serve", str(model_name),

From cda72638c333e653d1ac74df30a69b6abfbf3624 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Wed, 17 Sep 2025 03:06:04 -0700
Subject: [PATCH 138/154] Update synthetic.py

---
 unsloth/dataprep/synthetic.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/unsloth/dataprep/synthetic.py b/unsloth/dataprep/synthetic.py
index d52f1df373..68dd475e59 100644
--- a/unsloth/dataprep/synthetic.py
+++ b/unsloth/dataprep/synthetic.py
@@ -99,7 +99,7 @@ def __init__(
         if "model"  in engine_args: del engine_args["model"]
         if "compilation_config" in engine_args:
             # Cannot parse in vllm serve
-            engine_args["compilation_config"] = "'" + str(engine_args["compilation_config"]) + "'"
+            engine_args["compilation_config"] = '"' + str(engine_args["compilation_config"]) + '"'
 
         subprocess_commands = [
             "vllm", "serve", str(model_name),

From dd8ad929e13235091c0379a03a2f09ac3a5c61a1 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Wed, 17 Sep 2025 03:07:03 -0700
Subject: [PATCH 139/154] Update synthetic.py

---
 unsloth/dataprep/synthetic.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/unsloth/dataprep/synthetic.py b/unsloth/dataprep/synthetic.py
index 68dd475e59..53d655ce0e 100644
--- a/unsloth/dataprep/synthetic.py
+++ b/unsloth/dataprep/synthetic.py
@@ -97,15 +97,15 @@ def __init__(
                 engine_args["dtype"] = "auto"
         if "device" in engine_args: del engine_args["device"]
         if "model"  in engine_args: del engine_args["model"]
-        if "compilation_config" in engine_args:
-            # Cannot parse in vllm serve
-            engine_args["compilation_config"] = '"' + str(engine_args["compilation_config"]) + '"'
 
         subprocess_commands = [
             "vllm", "serve", str(model_name),
         ]
         for key, value in engine_args.items():
             flag  = key.replace("_", "-")
+            if key == "compilation_config":
+                subprocess_commands += ["--" + '"' + str(value) + '"',]
+                continue
             which = str(value).replace("torch.", "")
             if which == "True":
                 # Ignore --enforce-eager True

From a725b98363e50b7c80649e83975c1f9017f01eed Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Wed, 17 Sep 2025 03:24:07 -0700
Subject: [PATCH 140/154] Update synthetic.py

---
 unsloth/dataprep/synthetic.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/unsloth/dataprep/synthetic.py b/unsloth/dataprep/synthetic.py
index 53d655ce0e..7c421b33bf 100644
--- a/unsloth/dataprep/synthetic.py
+++ b/unsloth/dataprep/synthetic.py
@@ -104,7 +104,7 @@ def __init__(
         for key, value in engine_args.items():
             flag  = key.replace("_", "-")
             if key == "compilation_config":
-                subprocess_commands += ["--" + '"' + str(value) + '"',]
+                subprocess_commands += ["--" + flag, '"' + str(value) + '"',]
                 continue
             which = str(value).replace("torch.", "")
             if which == "True":

From 321f1a33b0e243691b8e297ac0170393d51456ff Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Wed, 17 Sep 2025 03:26:57 -0700
Subject: [PATCH 141/154] Update synthetic.py

---
 unsloth/dataprep/synthetic.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/unsloth/dataprep/synthetic.py b/unsloth/dataprep/synthetic.py
index 7c421b33bf..7e27b8261d 100644
--- a/unsloth/dataprep/synthetic.py
+++ b/unsloth/dataprep/synthetic.py
@@ -104,7 +104,7 @@ def __init__(
         for key, value in engine_args.items():
             flag  = key.replace("_", "-")
             if key == "compilation_config":
-                subprocess_commands += ["--" + flag, '"' + str(value) + '"',]
+                subprocess_commands += ["--" + flag, "'" + str(value) + "'",]
                 continue
             which = str(value).replace("torch.", "")
             if which == "True":

From 357e5019b7341c9b19f62db146950113e4aa58b9 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Wed, 17 Sep 2025 03:30:11 -0700
Subject: [PATCH 142/154] Update synthetic.py

---
 unsloth/dataprep/synthetic.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/unsloth/dataprep/synthetic.py b/unsloth/dataprep/synthetic.py
index 7e27b8261d..aa5296c58b 100644
--- a/unsloth/dataprep/synthetic.py
+++ b/unsloth/dataprep/synthetic.py
@@ -104,7 +104,8 @@ def __init__(
         for key, value in engine_args.items():
             flag  = key.replace("_", "-")
             if key == "compilation_config":
-                subprocess_commands += ["--" + flag, "'" + str(value) + "'",]
+                quoted_compilation_config = '"' + str(value) + '"'
+                subprocess_commands += ["--" + flag, "'" + quoted_compilation_config[1:-1] + "'",]
                 continue
             which = str(value).replace("torch.", "")
             if which == "True":

From 8a03656b958d023c4e2639ef3cf7d6c0616f4efb Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Wed, 17 Sep 2025 03:32:54 -0700
Subject: [PATCH 143/154] Update synthetic.py

---
 unsloth/dataprep/synthetic.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/unsloth/dataprep/synthetic.py b/unsloth/dataprep/synthetic.py
index aa5296c58b..eb73a5fb84 100644
--- a/unsloth/dataprep/synthetic.py
+++ b/unsloth/dataprep/synthetic.py
@@ -121,6 +121,7 @@ def __init__(
                 subprocess_commands += ["--" + flag, which,]
         pass
         logger.info(subprocess_commands)
+        print(subprocess_commands)
         vllm_process = subprocess.Popen(
             subprocess_commands,
             stdout = subprocess.PIPE,

From d7832d01baaef9a791c509d69c122c61385425f2 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Wed, 17 Sep 2025 03:42:00 -0700
Subject: [PATCH 144/154] Update synthetic.py

---
 unsloth/dataprep/synthetic.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/unsloth/dataprep/synthetic.py b/unsloth/dataprep/synthetic.py
index eb73a5fb84..70f94e5584 100644
--- a/unsloth/dataprep/synthetic.py
+++ b/unsloth/dataprep/synthetic.py
@@ -102,10 +102,9 @@ def __init__(
             "vllm", "serve", str(model_name),
         ]
         for key, value in engine_args.items():
-            flag  = key.replace("_", "-")
+            flag = key.replace("_", "-")
             if key == "compilation_config":
-                quoted_compilation_config = '"' + str(value) + '"'
-                subprocess_commands += ["--" + flag, "'" + quoted_compilation_config[1:-1] + "'",]
+                subprocess_commands += ["--" + flag, str(value),]
                 continue
             which = str(value).replace("torch.", "")
             if which == "True":
@@ -121,7 +120,6 @@ def __init__(
                 subprocess_commands += ["--" + flag, which,]
         pass
         logger.info(subprocess_commands)
-        print(subprocess_commands)
         vllm_process = subprocess.Popen(
             subprocess_commands,
             stdout = subprocess.PIPE,

From 84f54348de880229dd67afbb737ea247839a6afa Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Wed, 17 Sep 2025 04:09:59 -0700
Subject: [PATCH 145/154] Update synthetic.py

---
 unsloth/dataprep/synthetic.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/unsloth/dataprep/synthetic.py b/unsloth/dataprep/synthetic.py
index 70f94e5584..b75918237b 100644
--- a/unsloth/dataprep/synthetic.py
+++ b/unsloth/dataprep/synthetic.py
@@ -104,7 +104,8 @@ def __init__(
         for key, value in engine_args.items():
             flag = key.replace("_", "-")
             if key == "compilation_config":
-                subprocess_commands += ["--" + flag, str(value),]
+                # [TODO] Unsure why subprocess doesn't process json properly
+                subprocess_commands += ["-O3",]
                 continue
             which = str(value).replace("torch.", "")
             if which == "True":

From 17b2e98f3df7735166a6c3f8b4ba2689418bc6e3 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Wed, 17 Sep 2025 04:16:41 -0700
Subject: [PATCH 146/154] Update synthetic.py

---
 unsloth/dataprep/synthetic.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/unsloth/dataprep/synthetic.py b/unsloth/dataprep/synthetic.py
index b75918237b..9651df23e8 100644
--- a/unsloth/dataprep/synthetic.py
+++ b/unsloth/dataprep/synthetic.py
@@ -105,7 +105,8 @@ def __init__(
             flag = key.replace("_", "-")
             if key == "compilation_config":
                 # [TODO] Unsure why subprocess doesn't process json properly
-                subprocess_commands += ["-O3",]
+                # Also -O3 breaks on T4!
+                # subprocess_commands += ["-O3",]
                 continue
             which = str(value).replace("torch.", "")
             if which == "True":

From 5364138046cdddedc37594ae87f5e51bb0265031 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Wed, 17 Sep 2025 06:35:44 -0700
Subject: [PATCH 147/154] Update mapper.py

---
 unsloth/models/mapper.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/unsloth/models/mapper.py b/unsloth/models/mapper.py
index be269316fe..eb9119b681 100644
--- a/unsloth/models/mapper.py
+++ b/unsloth/models/mapper.py
@@ -956,6 +956,16 @@
         "google/gemma-3-270m",
         "unsloth/gemma-3-270m-bnb-4bit",
     ),
+    "unsloth/Magistral-Small-2507-unsloth-bnb-4bit" : (
+        "unsloth/Magistral-Small-2507",
+        "mistralai/Magistral-Small-2507",
+        "unsloth/Magistral-Small-2507-bnb-4bit",
+    ),
+    "unsloth/Magistral-Small-2509-unsloth-bnb-4bit" : (
+        "unsloth/Magistral-Small-2509",
+        "mistralai/Magistral-Small-2509",
+        "unsloth/Magistral-Small-2509-bnb-4bit",
+    ),
 }
 
 INT_TO_FLOAT_MAPPER  = {}

From 8dbd0084d4097cf3c5eb03027ecdf5ec5bdacc17 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Wed, 17 Sep 2025 08:21:10 -0700
Subject: [PATCH 148/154] Versioning

---
 pyproject.toml           | 4 ++--
 unsloth/models/_utils.py | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 70fc3bdedc..c3915c1cd6 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -37,7 +37,7 @@ triton = [
 ]
 
 huggingface = [
-    "unsloth_zoo>=2025.9.7",
+    "unsloth_zoo>=2025.9.8",
     "packaging",
     "tyro",
     "transformers>=4.51.3,!=4.52.0,!=4.52.1,!=4.52.2,!=4.52.3,!=4.53.0,!=4.54.0,!=4.55.0,!=4.55.1,<=4.55.4",
@@ -453,7 +453,7 @@ colab-ampere-torch220 = [
     "flash-attn>=2.6.3",
 ]
 colab-new = [
-    "unsloth_zoo>=2025.9.7",
+    "unsloth_zoo>=2025.9.8",
     "packaging",
     "tyro",
     "transformers>=4.51.3,!=4.47.0,!=4.52.0,!=4.52.1,!=4.52.2,!=4.52.3,!=4.53.0,!=4.54.0,!=4.55.0,!=4.55.1,<=4.55.4",
diff --git a/unsloth/models/_utils.py b/unsloth/models/_utils.py
index 41adc74650..d2ebc29bf2 100644
--- a/unsloth/models/_utils.py
+++ b/unsloth/models/_utils.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__ = "2025.9.6"
+__version__ = "2025.9.7"
 
 __all__ = [
     "SUPPORTS_BFLOAT16",

From d7ca79f18ef5b794b3684768708ab7ebb57a4acc Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Wed, 17 Sep 2025 22:01:19 -0700
Subject: [PATCH 149/154] Update loader.py

---
 unsloth/models/loader.py | 39 ++++++++++++++++++++-------------------
 1 file changed, 20 insertions(+), 19 deletions(-)

diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py
index da40fb57d8..e891340221 100644
--- a/unsloth/models/loader.py
+++ b/unsloth/models/loader.py
@@ -83,8 +83,8 @@
 
 global FORCE_FLOAT32
 FORCE_FLOAT32 = [
-    "gemma3,",  # Add comma bc gemma3 will match gemma3n
-    "gemma3n",
+    "gemma3,",
+    "gemma3n,",
     "gpt_oss",
 ]
 
@@ -627,7 +627,7 @@ def from_pretrained(
             is_peft = False
         pass
         model_types = get_transformers_model_type(model_config or peft_config)
-        model_types_all = ",".join(model_types)
+        model_types_all = ",".join(model_types) + ","
 
         # Check versions
         lowered_model_name = model_name.lower()
@@ -642,21 +642,22 @@ def from_pretrained(
         elif "qwen2_5" in model_types_all and transformers_version < Version("4.49.0"):
             raise RuntimeError("Unsloth: Qwen 2.5 only works on transformers >= 4.49.0." + LATEST)
         # Gemma 3
-        elif "gemma3" in model_types_all:
-            if "gemma3n" in model_types_all:
-                if transformers_version < Version("4.53.0"):
-                    raise RuntimeError("Unsloth: Gemma 3N only works on transformers >= 4.53.0" + LATEST)
-                os.environ["UNSLOTH_DISABLE_STATIC_GENERATION"] = "1"
-                os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \
-                    "float16;torch.float16;torch.float16;"\
-                    "if name.endswith('norm'): "\
-                    "module._pre_set_compute_dtype = torch.float32\n"\
-                    ";"\
-                    "from unsloth_zoo.temporary_patches.gemma3n import patch_Gemma3nConv_Embed_forwards; patch_Gemma3nConv_Embed_forwards()"
-            else:
-                if transformers_version < Version("4.50.0.dev0"):
-                    raise RuntimeError("Unsloth: Gemma 3 only works on transformers >= 4.50.0." + NIGHTLY)
-
+        elif "gemma3," in model_types_all:
+            if transformers_version < Version("4.50.0.dev0"):
+                raise RuntimeError("Unsloth: Gemma 3 only works on transformers >= 4.50.0." + NIGHTLY)
+            # Set norms to float32 since anyways they get upcasted to float32
+            # common in both gemma-3 and gemma-3n
+            os.environ["UNSLOTH_HIGH_PRECISION_LAYERNORM"] = "1"
+        elif "gemma3n," in model_types_all:
+            if transformers_version < Version("4.53.0"):
+                raise RuntimeError("Unsloth: Gemma 3N only works on transformers >= 4.53.0" + LATEST)
+            os.environ["UNSLOTH_DISABLE_STATIC_GENERATION"] = "1"
+            os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \
+                "float16;torch.float16;torch.float16;"\
+                "if name.endswith('norm'): "\
+                "module._pre_set_compute_dtype = torch.float32\n"\
+                ";"\
+                "from unsloth_zoo.temporary_patches.gemma3n import patch_Gemma3nConv_Embed_forwards; patch_Gemma3nConv_Embed_forwards()"
             # Set norms to float32 since anyways they get upcasted to float32
             # common in both gemma-3 and gemma-3n
             os.environ["UNSLOTH_HIGH_PRECISION_LAYERNORM"] = "1"
@@ -811,7 +812,7 @@ def from_pretrained(
         for disable_name in FORCE_FLOAT32:
             # add comma to model_types_all matching in case of exact match for end
             if (disable_name.lower() == model_type_arch.lower().replace("-", "").replace("_", "") or \
-                disable_name.lower() in f'{model_types_all},') and \
+                disable_name.lower() in model_types_all) and \
                 ((dtype == torch.float16) or not SUPPORTS_BFLOAT16):
                 os.environ["UNSLOTH_FORCE_FLOAT32"] = "1"
                 dtype = torch.bfloat16 # Change to bfloat16 loading

From bb90785ad3066b4ba926cf1e607f120128c32982 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Wed, 17 Sep 2025 22:10:29 -0700
Subject: [PATCH 150/154] Update loader.py

---
 unsloth/models/loader.py | 39 +++++++++++++++++++--------------------
 1 file changed, 19 insertions(+), 20 deletions(-)

diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py
index e891340221..da40fb57d8 100644
--- a/unsloth/models/loader.py
+++ b/unsloth/models/loader.py
@@ -83,8 +83,8 @@
 
 global FORCE_FLOAT32
 FORCE_FLOAT32 = [
-    "gemma3,",
-    "gemma3n,",
+    "gemma3,",  # Add comma bc gemma3 will match gemma3n
+    "gemma3n",
     "gpt_oss",
 ]
 
@@ -627,7 +627,7 @@ def from_pretrained(
             is_peft = False
         pass
         model_types = get_transformers_model_type(model_config or peft_config)
-        model_types_all = ",".join(model_types) + ","
+        model_types_all = ",".join(model_types)
 
         # Check versions
         lowered_model_name = model_name.lower()
@@ -642,22 +642,21 @@ def from_pretrained(
         elif "qwen2_5" in model_types_all and transformers_version < Version("4.49.0"):
             raise RuntimeError("Unsloth: Qwen 2.5 only works on transformers >= 4.49.0." + LATEST)
         # Gemma 3
-        elif "gemma3," in model_types_all:
-            if transformers_version < Version("4.50.0.dev0"):
-                raise RuntimeError("Unsloth: Gemma 3 only works on transformers >= 4.50.0." + NIGHTLY)
-            # Set norms to float32 since anyways they get upcasted to float32
-            # common in both gemma-3 and gemma-3n
-            os.environ["UNSLOTH_HIGH_PRECISION_LAYERNORM"] = "1"
-        elif "gemma3n," in model_types_all:
-            if transformers_version < Version("4.53.0"):
-                raise RuntimeError("Unsloth: Gemma 3N only works on transformers >= 4.53.0" + LATEST)
-            os.environ["UNSLOTH_DISABLE_STATIC_GENERATION"] = "1"
-            os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \
-                "float16;torch.float16;torch.float16;"\
-                "if name.endswith('norm'): "\
-                "module._pre_set_compute_dtype = torch.float32\n"\
-                ";"\
-                "from unsloth_zoo.temporary_patches.gemma3n import patch_Gemma3nConv_Embed_forwards; patch_Gemma3nConv_Embed_forwards()"
+        elif "gemma3" in model_types_all:
+            if "gemma3n" in model_types_all:
+                if transformers_version < Version("4.53.0"):
+                    raise RuntimeError("Unsloth: Gemma 3N only works on transformers >= 4.53.0" + LATEST)
+                os.environ["UNSLOTH_DISABLE_STATIC_GENERATION"] = "1"
+                os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \
+                    "float16;torch.float16;torch.float16;"\
+                    "if name.endswith('norm'): "\
+                    "module._pre_set_compute_dtype = torch.float32\n"\
+                    ";"\
+                    "from unsloth_zoo.temporary_patches.gemma3n import patch_Gemma3nConv_Embed_forwards; patch_Gemma3nConv_Embed_forwards()"
+            else:
+                if transformers_version < Version("4.50.0.dev0"):
+                    raise RuntimeError("Unsloth: Gemma 3 only works on transformers >= 4.50.0." + NIGHTLY)
+
             # Set norms to float32 since anyways they get upcasted to float32
             # common in both gemma-3 and gemma-3n
             os.environ["UNSLOTH_HIGH_PRECISION_LAYERNORM"] = "1"
@@ -812,7 +811,7 @@ def from_pretrained(
         for disable_name in FORCE_FLOAT32:
             # add comma to model_types_all matching in case of exact match for end
             if (disable_name.lower() == model_type_arch.lower().replace("-", "").replace("_", "") or \
-                disable_name.lower() in model_types_all) and \
+                disable_name.lower() in f'{model_types_all},') and \
                 ((dtype == torch.float16) or not SUPPORTS_BFLOAT16):
                 os.environ["UNSLOTH_FORCE_FLOAT32"] = "1"
                 dtype = torch.bfloat16 # Change to bfloat16 loading

From 3289826add711c92dee44f1117fa6a54d6e68b91 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Thu, 18 Sep 2025 02:22:14 -0700
Subject: [PATCH 151/154] Update rl.py

---
 unsloth/models/rl.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/unsloth/models/rl.py b/unsloth/models/rl.py
index 6f1f000e68..3d5f6d084b 100644
--- a/unsloth/models/rl.py
+++ b/unsloth/models/rl.py
@@ -259,7 +259,8 @@ def _patch_trl_rl_trainers(trainer_file = "grpo_trainer"):
         "use_fp16 = getattr(args, 'fp16', False)\n"\
         "if type(use_fp16) is not bool: use_fp16 = False\n"\
         "force_float32 = False\n"\
-        "if os.environ.get('UNSLOTH_FORCE_FLOAT32', '0') == '1':\n"\
+        "full_finetuning = os.environ.get('UNSLOTH_ENABLE_FULL_FINETUNING', '0') == '1'\n"\
+        "if not full_finetuning and (os.environ.get('UNSLOTH_FORCE_FLOAT32', '0') == '1'):\n"\
         "    print('Unsloth: Switching to float32 training since model cannot work with float16')\n"\
         "    force_float32 = True\n"\
         "mixed_precision_dtype = os.environ.get('UNSLOTH_MIXED_PRECISION', 'float32')\n"\

From a04211436f8a11aaece59d4662a29ab4c825a0b1 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Thu, 18 Sep 2025 04:31:28 -0700
Subject: [PATCH 152/154] Versioning

---
 pyproject.toml           | 4 ++--
 unsloth/models/_utils.py | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index c3915c1cd6..4f9c308b32 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -37,7 +37,7 @@ triton = [
 ]
 
 huggingface = [
-    "unsloth_zoo>=2025.9.8",
+    "unsloth_zoo>=2025.9.9",
     "packaging",
     "tyro",
     "transformers>=4.51.3,!=4.52.0,!=4.52.1,!=4.52.2,!=4.52.3,!=4.53.0,!=4.54.0,!=4.55.0,!=4.55.1,<=4.55.4",
@@ -453,7 +453,7 @@ colab-ampere-torch220 = [
     "flash-attn>=2.6.3",
 ]
 colab-new = [
-    "unsloth_zoo>=2025.9.8",
+    "unsloth_zoo>=2025.9.9",
     "packaging",
     "tyro",
     "transformers>=4.51.3,!=4.47.0,!=4.52.0,!=4.52.1,!=4.52.2,!=4.52.3,!=4.53.0,!=4.54.0,!=4.55.0,!=4.55.1,<=4.55.4",
diff --git a/unsloth/models/_utils.py b/unsloth/models/_utils.py
index 79134005dc..5f41352d97 100644
--- a/unsloth/models/_utils.py
+++ b/unsloth/models/_utils.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__ = "2025.9.7"
+__version__ = "2025.9.8"
 
 __all__ = [
     "SUPPORTS_BFLOAT16",

From ffa04dde12b7fa9430566cce8b0309531f7af2ba Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Thu, 18 Sep 2025 15:45:42 -0700
Subject: [PATCH 153/154] Update _utils.py

---
 unsloth/models/_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/unsloth/models/_utils.py b/unsloth/models/_utils.py
index 5f41352d97..79134005dc 100644
--- a/unsloth/models/_utils.py
+++ b/unsloth/models/_utils.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__ = "2025.9.8"
+__version__ = "2025.9.7"
 
 __all__ = [
     "SUPPORTS_BFLOAT16",

From b3654449bdd237e642e5f44c6e96c74e203232f7 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Thu, 18 Sep 2025 18:57:41 -0700
Subject: [PATCH 154/154] Fix auto_mapping

---
 unsloth/models/llama.py  | 4 +++-
 unsloth/models/vision.py | 4 +++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/unsloth/models/llama.py b/unsloth/models/llama.py
index 7414c07326..6326f519f1 100644
--- a/unsloth/models/llama.py
+++ b/unsloth/models/llama.py
@@ -25,7 +25,7 @@
 from torch.nn.functional import scaled_dot_product_attention
 from transformers import __version__ as transformers_version
 from unsloth_zoo.utils import Version, _get_dtype
-from unsloth_zoo.hf_utils import dtype_from_config, add_dtype_kwargs
+from unsloth_zoo.hf_utils import dtype_from_config, add_dtype_kwargs, fix_lora_auto_mapping
 from unsloth_zoo.peft_utils import SKIP_QUANTIZATION_MODULES
 from unsloth import DEVICE_TYPE, DEVICE_COUNT
 
@@ -2632,6 +2632,8 @@ def get_peft_model(
         pass
 
         model = _get_peft_model(model, lora_config)
+        # Fix LoraConfig.auto_mapping is None
+        fix_lora_auto_mapping(model)
 
         # Apply QAT + LoRA if specified
         if qat_scheme is not None:
diff --git a/unsloth/models/vision.py b/unsloth/models/vision.py
index d6c710c281..d03ffb45a9 100644
--- a/unsloth/models/vision.py
+++ b/unsloth/models/vision.py
@@ -43,7 +43,7 @@
 from transformers import __version__ as transformers_version
 from triton import __version__ as triton_version
 from unsloth_zoo.utils import _get_dtype
-from unsloth_zoo.hf_utils import dtype_from_config, add_dtype_kwargs
+from unsloth_zoo.hf_utils import dtype_from_config, add_dtype_kwargs, fix_lora_auto_mapping
 from unsloth_zoo.patching_utils import patch_model_and_tokenizer
 from unsloth_zoo.training_utils import prepare_model_for_training
 
@@ -758,6 +758,8 @@ def get_peft_model(
             use_gradient_checkpointing = use_gradient_checkpointing,
         )
         model = _get_peft_model(model, lora_config)
+        # Fix LoraConfig.auto_mapping is None
+        fix_lora_auto_mapping(model)
         # Enable gradients on modules which are trainable
         requires_grad_for_gradient_checkpointing(model)
         trust_remote_code = getattr(model, "_unsloth_trust_remote_code", False)