unslothai · danielhanchen · Dec 17, 2025 · Dec 9, 2025 · Dec 9, 2025 · Dec 9, 2025
diff --git a/pyproject.toml b/pyproject.toml
@@ -60,7 +60,7 @@ huggingfacenotorch = [
 ]
 huggingface = [
     "unsloth[huggingfacenotorch]",
-    "unsloth_zoo>=2025.12.4",
+    "unsloth_zoo>=2025.12.5",
     "torchvision",
     "unsloth[triton]",
 ]
@@ -523,7 +523,7 @@ colab-ampere-torch220 = [
     "flash-attn>=2.6.3 ; ('linux' in sys_platform)",
 ]
 colab-new = [
-    "unsloth_zoo>=2025.12.4",
+    "unsloth_zoo>=2025.12.5",
     "packaging",
     "tyro",
     "transformers>=4.51.3,!=4.52.0,!=4.52.1,!=4.52.2,!=4.52.3,!=4.53.0,!=4.54.0,!=4.55.0,!=4.55.1,!=4.57.0,<=4.57.3",

diff --git a/unsloth/__init__.py b/unsloth/__init__.py
@@ -29,14 +29,17 @@
     fix_message_factory_issue,
     check_fbgemm_gpu_version,
     torchvision_compatibility_check,
+    fix_diffusers_warnings,
 )
 
 fix_message_factory_issue()
 check_fbgemm_gpu_version()
 torchvision_compatibility_check()
+fix_diffusers_warnings()
 del fix_message_factory_issue
 del check_fbgemm_gpu_version
 del torchvision_compatibility_check
+del fix_diffusers_warnings
 
 # This check is critical because Unsloth optimizes these libraries by modifying
 # their code at import time. If they're imported first, the original (slower,

@@ -71,6 +71,36 @@ def filter(self, x):
         return not (self.text in x.getMessage())
 
 
+class HidePrintMessage:
+    __slots__ = ("_original_stream", "_hidden_texts")
+
+    def __init__(self, original_stream):
+        self._original_stream = original_stream
+        self._hidden_texts = []
+
+    def add_filter(self, text):
+        self._hidden_texts.append(text)
+
+    def write(self, message):
+        if not any(text in message for text in self._hidden_texts):
+            self._original_stream.write(message)
+
+    def flush(self):
+        self._original_stream.flush()
+
+    def __getattr__(self, name):
+        return getattr(self._original_stream, name)
+
+
+if os.environ.get("UNSLOTH_ENABLE_LOGGING", "0") != "1":
+    import sys
+
+    # Apply to stderr for FBGEMM
+    sys.stderr = HidePrintMessage(sys.stderr)
+    # https://github.com/pytorch/FBGEMM/blob/d99cd96490ec4aabac2ee95b1e76ea4dcfcfa628/fbgemm_gpu/experimental/gemm/triton_gemm/utils.py#L43-L52
+    sys.stderr.add_filter("TMA benchmarks will be running")
+
+
 # Fix up AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'
 # MUST do this at the start primarily due to tensorflow causing issues
 def fix_message_factory_issue():
@@ -506,3 +536,8 @@ def get_mapped_key(key: str, mapping_dict: dict[str, str]) -> str:
                 logger.info("Unsloth: Patching Executorch to fix get_mapped_key")
     except Exception as e:
         logger.info(f"Unsloth: Failed Executorch with error = {str(e)}")
+
+
+def fix_diffusers_warnings():
+    # Silence Flax classes are deprecated and will be removed in Diffusers v1.0.0.
+    os.environ["DIFFUSERS_VERBOSITY"] = "error"
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__ = "2025.12.5"
+__version__ = "2025.12.6"
 
 __all__ = [
     "SUPPORTS_BFLOAT16",
@@ -413,16 +413,6 @@ def filter(self, x):
 except:
     pass
 
-# Flax classes are deprecated and will be removed in Diffusers v1.0.0.
-try:
-    from diffusers.utils import logger as diffusers_logger
-
-    diffusers_logger.addFilter(HideLoggingMessage("are deprecated"))
-    del diffusers_logger
-except:
-    pass
-
-
 # Errors out on
 # Some weights of Gemma3nForConditionalGeneration were not initialized from the model checkpoint
 from transformers.modeling_utils import logger as transformers_logger

@@ -739,6 +739,8 @@ def from_pretrained(
                 "compatible with `full_finetuning=True`. If you wish to use QAT with LoRA, "
                 "please pass in `qat_scheme` in `FastLanguageModel.get_peft_model(...)` instead."
             )
+        if qat_scheme == "phone-deployment":
+            qat_scheme = "int8-int4"
         # Check if 4bit is allowed specifically for AMD
         if not ALLOW_BITSANDBYTES and not use_exact_model_name:
             if load_in_4bit or load_in_8bit or model_name.lower().endswith("-bnb-4bit"):

@@ -2745,6 +2745,17 @@ def _unsloth_save_torchao_with_attached_config(
     """Save a QAT-trained model by converting fake-quantized weights to real quantized weights."""
     # Convert QAT fake-quantized weights to real quantized weights
     _convert_torchao_model(model)
+    # PEFT models also might come here, so parse it
+    if isinstance(model, PeftModelForCausalLM):
+        _unsloth_save_torchao_with_given_config(
+            model = model,
+            save_directory = save_directory,
+            tokenizer = tokenizer,
+            torchao_config = model.config.quantization_config,
+            push_to_hub = push_to_hub,
+            token = token,
+        )
+        return
 
     # TorchAO does not support safe_serialization reliably
     safe_serialization = False
@@ -2806,7 +2817,10 @@ def _unsloth_save_torchao_with_given_config(
     )
     from torchao import quantize_
 
-    quantization_config = TorchAoConfig(quant_type = torchao_config)
+    if isinstance(torchao_config, TorchAoConfig):
+        quantization_config = torchao_config
+    else:
+        quantization_config = TorchAoConfig(quant_type = torchao_config)
 
     # Determine if this is a VLM
     is_vlm = False
@@ -2897,7 +2911,7 @@ def unsloth_save_pretrained_torchao(
     )
 
     if torchao_config is not None:
-        # PTQ path: user provided a config, model must NOT have QAT config
+        # PTQ path: user provided a config, model must NOT have QAT config unless PEFT
-        # PTQ path: user provided a config, model must NOT have QAT config unless PEFT
+        # PTQ path: user provided a config, model must NOT have QAT config
-        # PTQ path: user provided a config, model must NOT have QAT config unless PEFT
+        # PTQ path: user provided a config, model must NOT have QAT config
         assert not has_qat_config, (
             "Unsloth: You passed `torchao_config` but this model was trained with `qat_scheme`. "
             "For QAT models, do not pass `torchao_config` - the quantization config is already "
@@ -3010,7 +3024,11 @@ def patch_saving_functions(model, vision = False):
 
     original_model = model
     while True:
-        if original_model.push_to_hub.__name__ != "unsloth_push_to_hub":
+        # Check if push_to_hub exists before accessing its __name__
+        if (
+            hasattr(original_model, "push_to_hub")
+            and original_model.push_to_hub.__name__ != "unsloth_push_to_hub"
+        ):
             original_model.original_push_to_hub = original_model.push_to_hub
             original_model.push_to_hub = types.MethodType(
                 unsloth_push_to_hub, original_model