unslothai · danielhanchen · Feb 9, 2026 · Feb 9, 2026 · Feb 9, 2026 · Feb 9, 2026
@@ -442,10 +442,10 @@ def pre_patch():
         return
 
     @staticmethod
-    def post_patch(model, tokenizer):
+    def post_patch(model, tokenizer, correct_dtype = None):
         # Gemma does not downcast RoPE
         model, tokenizer = patch_model_and_tokenizer(
-            model, tokenizer, downcast_rope = False
+            model, tokenizer, downcast_rope = False, correct_dtype = correct_dtype
         )
 
         # Add 1 to weight

@@ -613,10 +613,10 @@ def pre_patch():
         return
 
     @staticmethod
-    def post_patch(model, tokenizer):
+    def post_patch(model, tokenizer, correct_dtype = None):
         # Gemma does not downcast RoPE
         model, tokenizer = patch_model_and_tokenizer(
-            model, tokenizer, downcast_rope = False
+            model, tokenizer, downcast_rope = False, correct_dtype = correct_dtype
         )
 
         # Add 1 to weight

@@ -542,7 +542,7 @@ def pre_patch():
         return
 
     @staticmethod
-    def post_patch(model, tokenizer):
+    def post_patch(model, tokenizer, correct_dtype = None):
         # Torch.compile fails on embedding matrix??
         # Workaround randomnly fixes it for torch versions < 2.2
         model.model.embed_tokens = torch.nn.Embedding.from_pretrained(

@@ -2483,7 +2483,9 @@ def from_pretrained(
         )
 
         model, tokenizer = patch_tokenizer(model, tokenizer)
-        model, tokenizer = model_patcher.post_patch(model, tokenizer)
+        model, tokenizer = model_patcher.post_patch(
+            model, tokenizer, correct_dtype = dtype
+        )
 
         # Patch up QKV / O and MLP
         for idx, layer in enumerate(model.model.layers):
@@ -2666,9 +2668,9 @@ def from_pretrained(
         return model, tokenizer
 
     @staticmethod
-    def post_patch(model, tokenizer):
+    def post_patch(model, tokenizer, correct_dtype = None):
         model, tokenizer = patch_model_and_tokenizer(
-            model, tokenizer, downcast_rope = True
+            model, tokenizer, downcast_rope = True, correct_dtype = correct_dtype
         )
         return model, tokenizer
 

@@ -1244,6 +1244,18 @@ def _patch_trl_rl_trainers(trainer_file = "grpo_trainer"):
                 flags = re.DOTALL,
             )
 
+    # Remove TRL's unconditional bfloat16 cast of trainable params (added in
+    # TRL 0.26.0). TRL hardcodes bfloat16 for QLoRA per the original paper's
+    # recommendation, but this is wrong: it ignores the user's requested dtype
+    # and breaks GradScaler when training with fp16=True. Unsloth already
+    # handles adapter dtype correctly via patch_model_and_tokenizer, so the
+    # entire block is unnecessary. For GRPOTrainer the enclosing peft init
+    # block is already removed above, making this a no-op for GRPO.
+    RLTrainer_source = RLTrainer_source.replace(
+        'if getattr(model, "is_loaded_in_4bit", False) or getattr(model, "is_loaded_in_8bit", False):',
+        "if False:",
+    )
-    RLTrainer_source = RLTrainer_source.replace(
-        "param.data = param.data.to(torch.bfloat16)",
-        "param.data = param.data.to(torch.float32 if getattr(args, 'fp16', False) else torch.bfloat16)",
-    )
+    RLTrainer_source = re.sub(
+        r"param\.data\s*=\s*param\.data\.to\(torch\.bfloat16\)",
+        "param.data = param.data.to(torch.float32 if getattr(args, 'fp16', False) else torch.bfloat16)",
+        RLTrainer_source,
+    )
-    RLTrainer_source = RLTrainer_source.replace(
-        "param.data = param.data.to(torch.bfloat16)",
-        "param.data = param.data.to(torch.float32 if getattr(args, 'fp16', False) else torch.bfloat16)",
-    )
+    RLTrainer_source = re.sub(
+        r"param\.data\s*=\s*param\.data\.to\(torch\.bfloat16\)",
+        "param.data = param.data.to(torch.float32 if getattr(args, 'fp16', False) else torch.bfloat16)",
+        RLTrainer_source,
+    )
+
     if RLTrainer_name == "SFTTrainer":
         original_text = 'self._signature_columns = ["input_ids", "attention_mask", "completion_mask"]'
         new_text = 'self._signature_columns = ["input_ids", "attention_mask", "completion_mask","labels"]'