diff --git a/unsloth/models/vision.py b/unsloth/models/vision.py
index 31733c2976..1404be8b0f 100644
--- a/unsloth/models/vision.py
+++ b/unsloth/models/vision.py
@@ -485,7 +485,7 @@ def post_patch_model(
         full_finetuning = os.environ.get("UNSLOTH_ENABLE_FULL_FINETUNING", "0") == "1"
 
         float32_mixed_precision = True
-        if _get_dtype(model.config.torch_dtype) == torch.bfloat16:
+        if _get_dtype(model.config.torch_dtype) == torch.bfloat16 and full_finetuning:
             # Use bfloat16 precision for full finetuning
             float32_mixed_precision = False
 
diff --git a/unsloth/save.py b/unsloth/save.py
index 4b2c012985..3e720ceb9b 100644
--- a/unsloth/save.py
+++ b/unsloth/save.py
@@ -2218,12 +2218,60 @@ def unsloth_convert_lora_to_ggml_and_save_locally(
 
 
 from .models.loader_utils import get_model_name
-from unsloth_zoo.saving_utils import merge_and_overwrite_lora
+from unsloth_zoo.saving_utils import (
+    merge_and_overwrite_lora,
+    prepare_saving,
+)
 from unsloth_zoo.llama_cpp import (
     install_llama_cpp,
-    convert_to_gguf,
+    convert_to_gguf as _convert_to_gguf,
 )
 
+@torch.inference_mode
+def save_to_gguf_generic(
+    model,
+    save_directory,
+    quantization_type = "Q8_0",
+    repo_id = None,
+    token = None,
+):
+    if token is None and repo_id is not None: token = get_token()
+    if repo_id is not None and token is None:
+        raise RuntimeError("Unsloth: Please specify a token for uploading!")
+
+    if not os.path.exists(os.path.join("llama.cpp", "unsloth_convert_hf_to_gguf.py")):
+        install_llama_cpp(just_clone_repo = True)
+    pass
+
+    metadata = _convert_to_gguf(
+        save_directory,
+        print_output = True,
+        quantization_type = quantization_type,
+    )
+    if repo_id is not None:
+        prepare_saving(
+            model,
+            repo_id,
+            push_to_hub = True,
+            max_shard_size = "50GB",
+            private = True,
+            token = token,
+        )
+
+        from huggingface_hub import HfApi
+        api = HfApi(token = token)
+        api.upload_folder(
+            folder_path = save_directory,
+            repo_id = repo_id,
+            repo_type = "model",
+            allow_patterns = ["*.gguf"],
+            private = True,
+        )
+    pass
+    return metadata
+pass
+
+
 @torch.inference_mode
 def unsloth_generic_save(
     model,
@@ -2467,8 +2515,8 @@ def patch_saving_functions(model, vision = False):
         # Vision only 1 option
         model.push_to_hub_merged     = types.MethodType(unsloth_generic_push_to_hub_merged,     model)
         model.save_pretrained_merged = types.MethodType(unsloth_generic_save_pretrained_merged, model)
-        model.push_to_hub_gguf       = types.MethodType(not_implemented_save,                   model)
-        model.save_pretrained_gguf   = types.MethodType(not_implemented_save,                   model)
+        model.push_to_hub_gguf       = types.MethodType(save_to_gguf_generic,                   model)
+        model.save_pretrained_gguf   = types.MethodType(save_to_gguf_generic,                   model)
     pass
     return model
 pass