diff --git a/unsloth/models/vision.py b/unsloth/models/vision.py index 31733c2976..1404be8b0f 100644 --- a/unsloth/models/vision.py +++ b/unsloth/models/vision.py @@ -485,7 +485,7 @@ def post_patch_model( full_finetuning = os.environ.get("UNSLOTH_ENABLE_FULL_FINETUNING", "0") == "1" float32_mixed_precision = True - if _get_dtype(model.config.torch_dtype) == torch.bfloat16: + if _get_dtype(model.config.torch_dtype) == torch.bfloat16 and full_finetuning: # Use bfloat16 precision for full finetuning float32_mixed_precision = False diff --git a/unsloth/save.py b/unsloth/save.py index 4b2c012985..3e720ceb9b 100644 --- a/unsloth/save.py +++ b/unsloth/save.py @@ -2218,12 +2218,60 @@ def unsloth_convert_lora_to_ggml_and_save_locally( from .models.loader_utils import get_model_name -from unsloth_zoo.saving_utils import merge_and_overwrite_lora +from unsloth_zoo.saving_utils import ( + merge_and_overwrite_lora, + prepare_saving, +) from unsloth_zoo.llama_cpp import ( install_llama_cpp, - convert_to_gguf, + convert_to_gguf as _convert_to_gguf, ) +@torch.inference_mode +def save_to_gguf_generic( + model, + save_directory, + quantization_type = "Q8_0", + repo_id = None, + token = None, +): + if token is None and repo_id is not None: token = get_token() + if repo_id is not None and token is None: + raise RuntimeError("Unsloth: Please specify a token for uploading!") + + if not os.path.exists(os.path.join("llama.cpp", "unsloth_convert_hf_to_gguf.py")): + install_llama_cpp(just_clone_repo = True) + pass + + metadata = _convert_to_gguf( + save_directory, + print_output = True, + quantization_type = quantization_type, + ) + if repo_id is not None: + prepare_saving( + model, + repo_id, + push_to_hub = True, + max_shard_size = "50GB", + private = True, + token = token, + ) + + from huggingface_hub import HfApi + api = HfApi(token = token) + api.upload_folder( + folder_path = save_directory, + repo_id = repo_id, + repo_type = "model", + allow_patterns = ["*.gguf"], + private = True, + ) + pass + return metadata +pass + + @torch.inference_mode def unsloth_generic_save( model, @@ -2467,8 +2515,8 @@ def patch_saving_functions(model, vision = False): # Vision only 1 option model.push_to_hub_merged = types.MethodType(unsloth_generic_push_to_hub_merged, model) model.save_pretrained_merged = types.MethodType(unsloth_generic_save_pretrained_merged, model) - model.push_to_hub_gguf = types.MethodType(not_implemented_save, model) - model.save_pretrained_gguf = types.MethodType(not_implemented_save, model) + model.push_to_hub_gguf = types.MethodType(save_to_gguf_generic, model) + model.save_pretrained_gguf = types.MethodType(save_to_gguf_generic, model) pass return model pass