From d89082ac059111f4cb8445a88084d3f743378f66 Mon Sep 17 00:00:00 2001 From: Erland366 Date: Fri, 14 Mar 2025 22:55:05 +0000 Subject: [PATCH 1/2] Enhance gradient checkpointing and add original model ID retrieval in saving utilities --- unsloth_zoo/peft_utils.py | 3 ++- unsloth_zoo/saving_utils.py | 25 ++++++++++++++++++++++++- 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/unsloth_zoo/peft_utils.py b/unsloth_zoo/peft_utils.py index babae8671..7db605fad 100644 --- a/unsloth_zoo/peft_utils.py +++ b/unsloth_zoo/peft_utils.py @@ -208,7 +208,8 @@ def requires_grad_pre_hook(module, input): raise RuntimeError("Unsloth: Failed to make input require gradients!") # print(f" WARNING: Empty list input to {module.__class__.__name__}!") # # return - input[0].requires_grad_(True) + if torch.is_floating_point(input[0]): + input[0].requires_grad_(True) else: raise RuntimeError("Unsloth: Failed to make input require gradients!") pass diff --git a/unsloth_zoo/saving_utils.py b/unsloth_zoo/saving_utils.py index abe99634a..d4b2e4809 100644 --- a/unsloth_zoo/saving_utils.py +++ b/unsloth_zoo/saving_utils.py @@ -61,6 +61,7 @@ pass from transformers.modeling_utils import PushToHubMixin import json +import os from pathlib import Path import tempfile from peft import PeftModelForCausalLM @@ -540,7 +541,13 @@ def merge_and_overwrite_lora( model_name = model.config._name_or_path # Find repository's max shard size and total size of everything - file_list = HfFileSystem(token = token).ls(model_name, detail = True) + try: + file_list = HfFileSystem(token = token).ls(model_name, detail = True) + except: + original_model_id = get_original_model_id(model_name) + model_name = original_model_id + file_list = HfFileSystem(token = token).ls(model_name, detail = True) + safetensors_list = [] max_size_in_bytes = 0 total_size_in_bytes = 0 @@ -909,6 +916,22 @@ def merge_lora_weights(state_dict, name): pass pass +def get_original_model_id(local_path: str): + import json + import os + + config_path = os.path.join(local_path, "config.json") + if os.path.exists(config_path): + with open(config_path, "r") as f: + config = json.load(f) + + # Check for _name_or_path that's not a local path + # When we load using AutoConfig, the _name_or_path changed into the local path instead + if "_name_or_path" in config and not os.path.exists(config["_name_or_path"]): + return config["_name_or_path"] + + return None + # Unsloth Zoo - Utilities for Unsloth # Copyright 2023-present Daniel Han-Chen, Michael Han-Chen & the Unsloth team. All rights reserved. # From ae6b1e3ecc57acfc33c85f977fceccd4bdc452a9 Mon Sep 17 00:00:00 2001 From: Erland366 Date: Fri, 14 Mar 2025 23:01:19 +0000 Subject: [PATCH 2/2] In case adapter_config.json as well --- unsloth_zoo/saving_utils.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/unsloth_zoo/saving_utils.py b/unsloth_zoo/saving_utils.py index d4b2e4809..1e72fbb5f 100644 --- a/unsloth_zoo/saving_utils.py +++ b/unsloth_zoo/saving_utils.py @@ -927,8 +927,16 @@ def get_original_model_id(local_path: str): # Check for _name_or_path that's not a local path # When we load using AutoConfig, the _name_or_path changed into the local path instead - if "_name_or_path" in config and not os.path.exists(config["_name_or_path"]): + if "_name_or_path" in config: return config["_name_or_path"] + + config_path = os.path.join(local_path, "adapter_config.json") + if os.path.exists(config_path): + with open(config_path, "r") as f: + config = json.load(f) + + if "base_model_name_or_path" in config: + return config["base_model_name_or_path"] return None