From c39f56fce039742693814b7770bde020399251a3 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Sat, 9 Aug 2025 14:45:43 -0700 Subject: [PATCH 001/154] Fix mamba --- unsloth/models/loader.py | 2 ++ unsloth/models/vision.py | 1 + 2 files changed, 3 insertions(+) diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py index ea746be43d..75561c4775 100644 --- a/unsloth/models/loader.py +++ b/unsloth/models/loader.py @@ -587,6 +587,8 @@ def from_pretrained( if transformers_version < Version("4.53.0"): raise RuntimeError("Unsloth: Gemma 3N only works on transformers >= 4.53.0" + LATEST) elif "falcon-h1" in lowered_model_name: + # Falcon must use float32 Triton ie TRITON_F32_DEFAULT = 'ieee' + # since Mamba kernels error out on using lower precision os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \ "float16;torch.float32;torch.float16;"\ "if name.endswith(('q_proj', 'k_proj', 'v_proj', 'o_proj', 'gate_proj', 'up_proj', 'down_proj', 'head')): module.to(torch.float16); "\ diff --git a/unsloth/models/vision.py b/unsloth/models/vision.py index 5524d8f16d..bdf86196d4 100644 --- a/unsloth/models/vision.py +++ b/unsloth/models/vision.py @@ -373,6 +373,7 @@ def from_pretrained( custom_datatype = _custom_datatype # Execute code as well if len(execute_code.strip()) != 0: + print(execute_code) exec(execute_code) else: custom_datatype = None From 4bd35c509f26c4ff3409090175bba7fab4a604a9 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Sat, 9 Aug 2025 14:50:53 -0700 Subject: [PATCH 002/154] Update loader.py --- unsloth/models/loader.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py index 75561c4775..186d302d44 100644 --- a/unsloth/models/loader.py +++ b/unsloth/models/loader.py @@ -591,8 +591,8 @@ def from_pretrained( # since Mamba kernels error out on using lower precision os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \ "float16;torch.float32;torch.float16;"\ - "if name.endswith(('q_proj', 'k_proj', 'v_proj', 'o_proj', 'gate_proj', 'up_proj', 'down_proj', 'head')): module.to(torch.float16); "\ - "os.environ['TRITON_F32_DEFAULT'] = 'ieee';" + "if name.endswith(('q_proj', 'k_proj', 'v_proj', 'o_proj', 'gate_proj', 'up_proj', 'down_proj', 'head')): "\ + "module, os.environ['TRITON_F32_DEFAULT'] = module.to(torch.float16), 'ieee'" elif "gpt-oss" in lowered_model_name: os.environ["UNSLOTH_DISABLE_STATIC_GENERATION"] = "1" # CCE fails on Tesla T4 From 1f0a4c32aac3ca721fb50cad39a8dbbf28e4fc1b Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Sat, 9 Aug 2025 14:51:04 -0700 Subject: [PATCH 003/154] Update vision.py --- unsloth/models/vision.py | 1 - 1 file changed, 1 deletion(-) diff --git a/unsloth/models/vision.py b/unsloth/models/vision.py index bdf86196d4..5524d8f16d 100644 --- a/unsloth/models/vision.py +++ b/unsloth/models/vision.py @@ -373,7 +373,6 @@ def from_pretrained( custom_datatype = _custom_datatype # Execute code as well if len(execute_code.strip()) != 0: - print(execute_code) exec(execute_code) else: custom_datatype = None From 3cb97197d56f31c040c8bc17f68bb682aacb1928 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Sat, 9 Aug 2025 14:54:35 -0700 Subject: [PATCH 004/154] Update loader.py --- unsloth/models/loader.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py index 186d302d44..b8f2432fc0 100644 --- a/unsloth/models/loader.py +++ b/unsloth/models/loader.py @@ -591,8 +591,8 @@ def from_pretrained( # since Mamba kernels error out on using lower precision os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \ "float16;torch.float32;torch.float16;"\ - "if name.endswith(('q_proj', 'k_proj', 'v_proj', 'o_proj', 'gate_proj', 'up_proj', 'down_proj', 'head')): "\ - "module, os.environ['TRITON_F32_DEFAULT'] = module.to(torch.float16), 'ieee'" + "if name.endswith(('q_proj', 'k_proj', 'v_proj', 'o_proj', 'gate_proj', 'up_proj', 'down_proj', 'head')): module.to(torch.float16);"\ + "os.environ['TRITON_F32_DEFAULT'] = 'ieee'" elif "gpt-oss" in lowered_model_name: os.environ["UNSLOTH_DISABLE_STATIC_GENERATION"] = "1" # CCE fails on Tesla T4 From 1432eac9d0b82ab732e4e4f1f9fbb0fbbb4c63df Mon Sep 17 00:00:00 2001 From: Datta Nimmaturi Date: Wed, 13 Aug 2025 08:16:43 +0530 Subject: [PATCH 005/154] Filter vLLM standby logs (#3131) * filter vLLM standby logs * safeguard standby logger patch * Update unsloth/models/_utils.py * Update unsloth/models/_utils.py * Update unsloth/models/_utils.py --------- Co-authored-by: Daniel Han --- unsloth/models/_utils.py | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/unsloth/models/_utils.py b/unsloth/models/_utils.py index 4426a28266..d904d8674a 100644 --- a/unsloth/models/_utils.py +++ b/unsloth/models/_utils.py @@ -152,6 +152,40 @@ def __init__(self, text): self.text = text def filter(self, x): return not (self.text in x.getMessage()) pass +if os.environ.get('UNSLOTH_ENABLE_LOGGING', '0') != '1': + try: + from vllm.worker.worker import logger as vllm_worker_logger + vllm_worker_logger.addFilter(HideLoggingMessage("Sleep mode freed")) + del vllm_worker_logger + except: + pass + try: + from vllm.v1.worker.gpu_worker import logger as vllm_gpu_worker_logger + vllm_gpu_worker_logger.addFilter(HideLoggingMessage("Sleep mode freed")) + del vllm_gpu_worker_logger + except: + pass + try: + from vllm.executor.executor_base import logger as vllm_executor_logger + vllm_executor_logger.addFilter(HideLoggingMessage("to fall asleep")) + vllm_executor_logger.addFilter(HideLoggingMessage("to wake up")) + del vllm_executor_logger + except: + pass + try: + from vllm.core.block.prefix_caching_block import logger as vllm_prefix_caching_logger + vllm_prefix_caching_logger.addFilter(HideLoggingMessage("reset prefix cache")) + del vllm_prefix_caching_logger + except: + pass + try: + from vllm.v1.core.block_pool import logger as vllm_block_pool_logger + vllm_block_pool_logger.addFilter(HideLoggingMessage("reset prefix cache")) + del vllm_block_pool_logger + except: + pass +pass + # The speedups for torchdynamo mostly come with GPU Ampere or higher and which is not detected here. from transformers.training_args import logger as transformers_training_args_logger transformers_training_args_logger.addFilter(HideLoggingMessage("The speedups")) From fd1124ab64c96af40dbdf8294a9e2bdaa55e01cf Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Tue, 12 Aug 2025 21:26:39 -0700 Subject: [PATCH 006/154] Update loader.py --- unsloth/models/loader.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py index b8f2432fc0..15f3e43aef 100644 --- a/unsloth/models/loader.py +++ b/unsloth/models/loader.py @@ -111,6 +111,14 @@ def from_pretrained( disable_log_stats = True, *args, **kwargs, ): + # Login to allow private models + if token is None: token = get_token() + if token is not None: + try: + from huggingface_hub import login + login(token = token) + except: + pass if load_in_8bit or full_finetuning: return FastModel.from_pretrained( model_name = model_name, @@ -513,6 +521,13 @@ def from_pretrained( *args, **kwargs, ): if token is None: token = get_token() + # Login to allow private models + if token is not None: + try: + from huggingface_hub import login + login(token = token) + except: + pass if whisper_language is not None: assert(type(whisper_language) is str) if whisper_task is not None: assert(type(whisper_task) is str) SUPPORTS_BFLOAT16 = is_bfloat16_supported() From b78189b2d5a127b43a10f5aed1359a1cfe3629c5 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Wed, 13 Aug 2025 03:27:54 -0700 Subject: [PATCH 007/154] Add scaler --- unsloth/models/_utils.py | 12 ++++++++++++ unsloth/models/rl.py | 14 ++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/unsloth/models/_utils.py b/unsloth/models/_utils.py index d904d8674a..3bd3c2c294 100644 --- a/unsloth/models/_utils.py +++ b/unsloth/models/_utils.py @@ -152,6 +152,7 @@ def __init__(self, text): self.text = text def filter(self, x): return not (self.text in x.getMessage()) pass +# Stop vLLM messages if os.environ.get('UNSLOTH_ENABLE_LOGGING', '0') != '1': try: from vllm.worker.worker import logger as vllm_worker_logger @@ -258,6 +259,17 @@ def filter(self, x): return not (self.text in x.getMessage()) except: pass +# You passed `quantization_config` or equivalent parameters +try: + warnings.filterwarnings( + action = "ignore", + message = r".*quantization_config.*", + category = UserWarning, + append = True, + ) +except: + pass + # Errors out on # Some weights of Gemma3nForConditionalGeneration were not initialized from the model checkpoint from transformers.modeling_utils import logger as transformers_logger diff --git a/unsloth/models/rl.py b/unsloth/models/rl.py index deb779588c..e751ef5e30 100644 --- a/unsloth/models/rl.py +++ b/unsloth/models/rl.py @@ -421,6 +421,20 @@ def _patch_trl_rl_trainers(trainer_file = "grpo_trainer"): RLTrainer_post += neftune_check pass + # Add accelerator scaler to model + if "model" in call_args: + neftune_check = \ + "if hasattr(self, 'accelerator'):\n"\ + " scaler = self.accelerator.scaler\n"\ + " current_model = model\n"\ + " while hasattr(current_model, 'model'):\n"\ + " current_model.accelerator_scaler = scaler\n"\ + " current_model = current_model.model\n"\ + " current_model.accelerator_scaler = scaler\n"\ + "pass\n" + RLTrainer_post += neftune_check + pass + # Edit optional metrics other_metrics_processor = "" if trainer_file in RL_METRICS_CHANGES: From cd2e284c97bb60618da78fcf1314f3a3a5885dd8 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Wed, 13 Aug 2025 05:12:35 -0700 Subject: [PATCH 008/154] Update llama.py --- unsloth/models/llama.py | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/unsloth/models/llama.py b/unsloth/models/llama.py index 3c0d5012ae..eafbd5a433 100644 --- a/unsloth/models/llama.py +++ b/unsloth/models/llama.py @@ -1197,12 +1197,25 @@ def _CausalLM_fast_forward( if self.config.model_type == "falcon_h1": hidden_states = hidden_states * self.config.lm_head_multiplier - loss = fused_linear_cross_entropy( - hidden_states = hidden_states, - lm_weight = lm_head, - labels = labels, - num_items_in_batch = n_items, - logit_softcapping = logit_softcapping, + # loss = fused_linear_cross_entropy( + # hidden_states = hidden_states, + # lm_weight = lm_head, + # labels = labels, + # num_items_in_batch = n_items, + # logit_softcapping = logit_softcapping, + # ) + loss = unsloth_fused_ce_loss( + trainer = None, + hidden_states = hidden_states, + lm_head_weight = lm_head, + lm_head_bias = None, + labels = labels, + mask = None, + n_items = n_items, + scaling = getattr(self, "accelerator_scaler", None), + target_gb = 1, + torch_compile = True, + logit_softcapping = logit_softcapping, ) if not return_dict: output = (logits,) + outputs[1:] From 5e976a5881296f35c6affae56178d3a2abc1fb50 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Wed, 13 Aug 2025 05:18:55 -0700 Subject: [PATCH 009/154] Update _utils.py --- unsloth/models/_utils.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/unsloth/models/_utils.py b/unsloth/models/_utils.py index 3bd3c2c294..d6eb82f01c 100644 --- a/unsloth/models/_utils.py +++ b/unsloth/models/_utils.py @@ -58,6 +58,7 @@ "HAS_CUT_CROSS_ENTROPY", "EMPTY_LOGITS", "fused_linear_cross_entropy", + "unsloth_fused_ce_loss", "patch_unsloth_smart_gradient_checkpointing", "unpatch_unsloth_smart_gradient_checkpointing", @@ -109,6 +110,7 @@ HAS_CUT_CROSS_ENTROPY, fused_linear_cross_entropy, _unsloth_get_batch_samples, + unsloth_fused_ce_loss, ) from unsloth_zoo.vision_utils import ( process_vision_info, From f451adff6be85230da2cd50bf068f23726d9b99d Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Wed, 13 Aug 2025 06:04:40 -0700 Subject: [PATCH 010/154] Versioning --- pyproject.toml | 6 +++--- unsloth/models/_utils.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 8e18688ddf..e563ba6fc5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,7 @@ name = "unsloth" dynamic = ["version"] description = "2-5X faster LLM finetuning" readme = "README.md" -requires-python = ">=3.9,<3.13" +requires-python = ">=3.9,<=3.13" license = {text = "Apache-2.0"} keywords = ["ai", "llm",] authors = [ @@ -37,7 +37,7 @@ triton = [ ] huggingface = [ - "unsloth_zoo>=2025.8.3", + "unsloth_zoo>=2025.8.4", "packaging", "tyro", "transformers>=4.51.3,!=4.47.0,!=4.52.0,!=4.52.1,!=4.52.2,!=4.52.3,!=4.53.0", @@ -384,7 +384,7 @@ colab-ampere-torch220 = [ "flash-attn>=2.6.3", ] colab-new = [ - "unsloth_zoo>=2025.8.3", + "unsloth_zoo>=2025.8.4", "packaging", "tyro", "transformers>=4.51.3,!=4.47.0,!=4.52.0,!=4.52.1,!=4.52.2,!=4.52.3,!=4.53.0", diff --git a/unsloth/models/_utils.py b/unsloth/models/_utils.py index d6eb82f01c..d1df57ad5c 100644 --- a/unsloth/models/_utils.py +++ b/unsloth/models/_utils.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2025.8.4" +__version__ = "2025.8.5" __all__ = [ "SUPPORTS_BFLOAT16", From 3b82c4259cd7506b351bf9b073a3033be22da8aa Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Thu, 14 Aug 2025 03:31:47 -0700 Subject: [PATCH 011/154] GPT OSS fix --- unsloth/models/loader.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py index 7ac27158a2..960f9cc23f 100644 --- a/unsloth/models/loader.py +++ b/unsloth/models/loader.py @@ -615,12 +615,18 @@ def from_pretrained( os.environ["UNSLOTH_ENABLE_CCE"] = "0" if not load_in_4bit: # Only upcast MoE biases for MXFP4, not BnB + # Also set down projection compute dtype to be float32 os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \ "all;None;None;"\ "x = 'gate_up_proj_bias'\n"\ - "if hasattr(module, x): setattr(module, x, torch.nn.Parameter(getattr(module, x).to(torch.float32)) if isinstance(getattr(module, x), torch.nn.Parameter) else getattr(module, x).to(torch.float32))\n"\ + "if hasattr(module, x): "\ + "setattr(module, x, torch.nn.Parameter(getattr(module, x).to(torch.float32)) if isinstance(getattr(module, x), torch.nn.Parameter) else getattr(module, x).to(torch.float32))\n"\ "x = 'down_proj_bias'\n"\ - "if hasattr(module, x): setattr(module, x, torch.nn.Parameter(getattr(module, x).to(torch.float32)) if isinstance(getattr(module, x), torch.nn.Parameter) else getattr(module, x).to(torch.float32))\n;" + "if hasattr(module, x): "\ + "setattr(module, x, torch.nn.Parameter(getattr(module, x).to(torch.float32)) if isinstance(getattr(module, x), torch.nn.Parameter) else getattr(module, x).to(torch.float32))\n"\ + ""\ + "if 'down_projs' in name and hasattr(module, 'compute_dtype'): module.compute_dtype = torch.float32\n"\ + ";" else: for check_model_name in DISABLE_COMPILE_MODEL_NAMES: if check_model_name in lowered_model_name: From 61366efc914563179c460c16e2e8e144fd4cb4d8 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Thu, 14 Aug 2025 03:50:52 -0700 Subject: [PATCH 012/154] GPT OSS fix --- unsloth/models/_utils.py | 2 ++ unsloth/models/loader.py | 4 +++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/unsloth/models/_utils.py b/unsloth/models/_utils.py index d1df57ad5c..ab2694fde1 100644 --- a/unsloth/models/_utils.py +++ b/unsloth/models/_utils.py @@ -68,6 +68,7 @@ "patch_fast_lora", "validate_loftq_config", "RaiseUninitialized", + "dequantize_module_weight", ] import torch @@ -724,6 +725,7 @@ def prepare_model_for_kbit_training( # Weirdly LoraLayer.update_layer downcasts PEFT layers to float16?? # For mixed precision, we need it to be in float32 not float16. from peft import __version__ as peft_version +from peft.utils.integrations import dequantize_module_weight if Version(peft_version) < Version("0.12.0"): from peft.tuners.lora.layer import LoraLayer try: diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py index 960f9cc23f..bb102376d4 100644 --- a/unsloth/models/loader.py +++ b/unsloth/models/loader.py @@ -625,7 +625,9 @@ def from_pretrained( "if hasattr(module, x): "\ "setattr(module, x, torch.nn.Parameter(getattr(module, x).to(torch.float32)) if isinstance(getattr(module, x), torch.nn.Parameter) else getattr(module, x).to(torch.float32))\n"\ ""\ - "if 'down_projs' in name and hasattr(module, 'compute_dtype'): module.compute_dtype = torch.float32\n"\ + "if 'down_projs' in name and hasattr(module, 'compute_dtype') and "\ + "torch.amax(dequantize_module_weight(module)) >= 1024:"\ + "module.compute_dtype = torch.float32\n"\ ";" else: for check_model_name in DISABLE_COMPILE_MODEL_NAMES: From de043d95684df41bf69ec8ea3c29538a9bcab1e4 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Thu, 14 Aug 2025 04:28:57 -0700 Subject: [PATCH 013/154] Update loader.py --- unsloth/models/loader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py index bb102376d4..c61aab750d 100644 --- a/unsloth/models/loader.py +++ b/unsloth/models/loader.py @@ -627,7 +627,7 @@ def from_pretrained( ""\ "if 'down_projs' in name and hasattr(module, 'compute_dtype') and "\ "torch.amax(dequantize_module_weight(module)) >= 1024:"\ - "module.compute_dtype = torch.float32\n"\ + "module._pre_set_compute_dtype = torch.float32\n"\ ";" else: for check_model_name in DISABLE_COMPILE_MODEL_NAMES: From c1ef6f1a6270e24b47259856e4b229f44cbe4053 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Thu, 14 Aug 2025 04:36:16 -0700 Subject: [PATCH 014/154] Update vision.py --- unsloth/models/vision.py | 1 + 1 file changed, 1 insertion(+) diff --git a/unsloth/models/vision.py b/unsloth/models/vision.py index 5524d8f16d..0f267104f3 100644 --- a/unsloth/models/vision.py +++ b/unsloth/models/vision.py @@ -458,6 +458,7 @@ def from_pretrained( # Edit data-types if custom_datatype is not None: for jj, (name, module) in enumerate(model.named_modules()): + print(custom_datatype) exec(custom_datatype) pass pass From f18cd268bae43f9c531bc78a0ded608339b9b056 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Thu, 14 Aug 2025 04:41:27 -0700 Subject: [PATCH 015/154] Update vision.py --- unsloth/models/vision.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/unsloth/models/vision.py b/unsloth/models/vision.py index 0f267104f3..fcba556e7a 100644 --- a/unsloth/models/vision.py +++ b/unsloth/models/vision.py @@ -356,6 +356,7 @@ def from_pretrained( correct_dtype = None if os.environ.get("UNSLOTH_FORCE_CUSTOM_DTYPE", "") != "": custom_datatype = os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] + print(custom_datatype) assert custom_datatype.count(";") >= 4 checker, _dtype, _bnb_compute_dtype, _custom_datatype, execute_code = custom_datatype.split(";", 4) @@ -371,6 +372,7 @@ def from_pretrained( bnb_compute_dtype = eval(_bnb_compute_dtype) correct_dtype = bnb_compute_dtype custom_datatype = _custom_datatype + print(custom_datatype) # Execute code as well if len(execute_code.strip()) != 0: exec(execute_code) @@ -458,7 +460,6 @@ def from_pretrained( # Edit data-types if custom_datatype is not None: for jj, (name, module) in enumerate(model.named_modules()): - print(custom_datatype) exec(custom_datatype) pass pass From 02152243313ae76b42e4b887d7d5c1c87b0901a6 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Thu, 14 Aug 2025 04:44:56 -0700 Subject: [PATCH 016/154] Update loader.py --- unsloth/models/loader.py | 9 +++++---- unsloth/models/vision.py | 2 -- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py index c61aab750d..d0b7d4dc4c 100644 --- a/unsloth/models/loader.py +++ b/unsloth/models/loader.py @@ -615,16 +615,17 @@ def from_pretrained( os.environ["UNSLOTH_ENABLE_CCE"] = "0" if not load_in_4bit: # Only upcast MoE biases for MXFP4, not BnB - # Also set down projection compute dtype to be float32 os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \ "all;None;None;"\ "x = 'gate_up_proj_bias'\n"\ "if hasattr(module, x): "\ "setattr(module, x, torch.nn.Parameter(getattr(module, x).to(torch.float32)) if isinstance(getattr(module, x), torch.nn.Parameter) else getattr(module, x).to(torch.float32))\n"\ "x = 'down_proj_bias'\n"\ - "if hasattr(module, x): "\ - "setattr(module, x, torch.nn.Parameter(getattr(module, x).to(torch.float32)) if isinstance(getattr(module, x), torch.nn.Parameter) else getattr(module, x).to(torch.float32))\n"\ - ""\ + ";" + else: + # Set down projection compute dtype to be float32 for float16 machines + os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \ + "all;None;None;"\ "if 'down_projs' in name and hasattr(module, 'compute_dtype') and "\ "torch.amax(dequantize_module_weight(module)) >= 1024:"\ "module._pre_set_compute_dtype = torch.float32\n"\ diff --git a/unsloth/models/vision.py b/unsloth/models/vision.py index fcba556e7a..5524d8f16d 100644 --- a/unsloth/models/vision.py +++ b/unsloth/models/vision.py @@ -356,7 +356,6 @@ def from_pretrained( correct_dtype = None if os.environ.get("UNSLOTH_FORCE_CUSTOM_DTYPE", "") != "": custom_datatype = os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] - print(custom_datatype) assert custom_datatype.count(";") >= 4 checker, _dtype, _bnb_compute_dtype, _custom_datatype, execute_code = custom_datatype.split(";", 4) @@ -372,7 +371,6 @@ def from_pretrained( bnb_compute_dtype = eval(_bnb_compute_dtype) correct_dtype = bnb_compute_dtype custom_datatype = _custom_datatype - print(custom_datatype) # Execute code as well if len(execute_code.strip()) != 0: exec(execute_code) From 5ed4a46e7c37e81e9db29f205ad811b061c330c1 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Thu, 14 Aug 2025 17:23:46 -0700 Subject: [PATCH 017/154] Update vision.py --- unsloth/models/vision.py | 1 + 1 file changed, 1 insertion(+) diff --git a/unsloth/models/vision.py b/unsloth/models/vision.py index 5524d8f16d..bfd0011f89 100644 --- a/unsloth/models/vision.py +++ b/unsloth/models/vision.py @@ -386,6 +386,7 @@ def from_pretrained( print(f"Unsloth: {model_type_arch.title()} does not support SDPA - switching to eager!") del kwargs["attn_implementation"] pass + print(supports_sdpa, kwargs) bnb_config = None if full_finetuning and (load_in_4bit or load_in_8bit): From a22255811467e34ddac87e9af9879e141bb35673 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Thu, 14 Aug 2025 19:22:16 -0700 Subject: [PATCH 018/154] Update vision.py --- unsloth/models/vision.py | 1 - 1 file changed, 1 deletion(-) diff --git a/unsloth/models/vision.py b/unsloth/models/vision.py index bfd0011f89..5524d8f16d 100644 --- a/unsloth/models/vision.py +++ b/unsloth/models/vision.py @@ -386,7 +386,6 @@ def from_pretrained( print(f"Unsloth: {model_type_arch.title()} does not support SDPA - switching to eager!") del kwargs["attn_implementation"] pass - print(supports_sdpa, kwargs) bnb_config = None if full_finetuning and (load_in_4bit or load_in_8bit): From 6cffb1cb06a7b2b5d14a3d36acc5970f1bd790a5 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Fri, 15 Aug 2025 04:25:15 -0700 Subject: [PATCH 019/154] Update llama.py --- unsloth/models/llama.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/unsloth/models/llama.py b/unsloth/models/llama.py index ab7f4bfdde..ae03a685eb 100644 --- a/unsloth/models/llama.py +++ b/unsloth/models/llama.py @@ -701,8 +701,9 @@ def LlamaModel_fast_forward( # Fix out of bounds tokenization if hasattr(self, "max_seq_length"): if seq_length > self.max_seq_length: + shape = input_ids.shape if input_ids is not None else inputs_embeds.shape logger.warning_once( - f"Unsloth: Input IDs of length {seq_length} > the model's max sequence length of {self.max_seq_length}.\n"\ + f"Unsloth: Input IDs of shape {shape} with length {seq_length} > the model's max sequence length of {self.max_seq_length}.\n"\ "We shall truncate it ourselves. It's imperative if you correct this issue first." ) if input_ids is not None: From 15d33a5f0a3fed1e8fbd89acf25dda33ceefc436 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Fri, 15 Aug 2025 04:34:50 -0700 Subject: [PATCH 020/154] Update llama.py --- unsloth/models/llama.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/unsloth/models/llama.py b/unsloth/models/llama.py index ae03a685eb..badcd51a12 100644 --- a/unsloth/models/llama.py +++ b/unsloth/models/llama.py @@ -698,6 +698,9 @@ def LlamaModel_fast_forward( seq_length_with_past = seq_length + shape = input_ids.shape if input_ids is not None else inputs_embeds.shape + print(shape) + # Fix out of bounds tokenization if hasattr(self, "max_seq_length"): if seq_length > self.max_seq_length: From 95a4dafadb9c1a3b65b4b0c0643741a4b6e144eb Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Fri, 15 Aug 2025 04:54:45 -0700 Subject: [PATCH 021/154] Update llama.py --- unsloth/models/llama.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/unsloth/models/llama.py b/unsloth/models/llama.py index badcd51a12..ae03a685eb 100644 --- a/unsloth/models/llama.py +++ b/unsloth/models/llama.py @@ -698,9 +698,6 @@ def LlamaModel_fast_forward( seq_length_with_past = seq_length - shape = input_ids.shape if input_ids is not None else inputs_embeds.shape - print(shape) - # Fix out of bounds tokenization if hasattr(self, "max_seq_length"): if seq_length > self.max_seq_length: From 4104bba896a760833061ece7dbbdff7423b5d141 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Fri, 15 Aug 2025 04:55:34 -0700 Subject: [PATCH 022/154] Versioning --- pyproject.toml | 4 ++-- unsloth/models/_utils.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index e563ba6fc5..6f6f225bde 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,7 +37,7 @@ triton = [ ] huggingface = [ - "unsloth_zoo>=2025.8.4", + "unsloth_zoo>=2025.8.5", "packaging", "tyro", "transformers>=4.51.3,!=4.47.0,!=4.52.0,!=4.52.1,!=4.52.2,!=4.52.3,!=4.53.0", @@ -384,7 +384,7 @@ colab-ampere-torch220 = [ "flash-attn>=2.6.3", ] colab-new = [ - "unsloth_zoo>=2025.8.4", + "unsloth_zoo>=2025.8.5", "packaging", "tyro", "transformers>=4.51.3,!=4.47.0,!=4.52.0,!=4.52.1,!=4.52.2,!=4.52.3,!=4.53.0", diff --git a/unsloth/models/_utils.py b/unsloth/models/_utils.py index ab2694fde1..c84fd118e7 100644 --- a/unsloth/models/_utils.py +++ b/unsloth/models/_utils.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2025.8.5" +__version__ = "2025.8.6" __all__ = [ "SUPPORTS_BFLOAT16", From 8cc1999edaee313354f76c2c232389ad3bf07f23 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Fri, 15 Aug 2025 05:03:06 -0700 Subject: [PATCH 023/154] Update mapper.py --- unsloth/models/mapper.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/unsloth/models/mapper.py b/unsloth/models/mapper.py index 829fe29583..e8fc55c2bd 100644 --- a/unsloth/models/mapper.py +++ b/unsloth/models/mapper.py @@ -941,6 +941,16 @@ "Qwen/Qwen3-4B-Thinking-2507", "unsloth/Qwen3-4B-Thinking-2507-bnb-4bit", ), + "unsloth/gemma-3-270m-it-unsloth-bnb-4bit" : ( + "unsloth/gemma-3-270m-it", + "google/gemma-3-270m-it", + "unsloth/gemma-3-270m-it-bnb-4bit", + ), + "unsloth/gemma-3-270m-unsloth-bnb-4bit" : ( + "unsloth/gemma-3-270m", + "google/gemma-3-270m", + "unsloth/gemma-3-270m-bnb-4bit", + ), } INT_TO_FLOAT_MAPPER = {} From ffda8a743c54fb648e8fef8039dfbd724d2fdce2 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Fri, 15 Aug 2025 18:39:46 -0700 Subject: [PATCH 024/154] Update vision.py --- unsloth/models/vision.py | 1 + 1 file changed, 1 insertion(+) diff --git a/unsloth/models/vision.py b/unsloth/models/vision.py index a5de457cef..a629021339 100644 --- a/unsloth/models/vision.py +++ b/unsloth/models/vision.py @@ -451,6 +451,7 @@ def from_pretrained( # attn_implementation = attn_implementation, **kwargs, ) + print(model.model.layers[0].input_layernorm.weight, model.model.layers[0].input_layernorm.weight.dtype) raise_handler.remove() # Return old flag os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = old_hf_transfer From cdf2e17aea327a652b034a9a2601fee0ae780fb5 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Fri, 15 Aug 2025 18:49:30 -0700 Subject: [PATCH 025/154] Update vision.py --- unsloth/models/vision.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/unsloth/models/vision.py b/unsloth/models/vision.py index a629021339..fa3bb25e12 100644 --- a/unsloth/models/vision.py +++ b/unsloth/models/vision.py @@ -470,6 +470,7 @@ def from_pretrained( if DEVICE_TYPE == "cuda": torch.cuda.empty_cache() elif DEVICE_TYPE == "xpu": torch.xpu.empty_cache() pass + print(model.model.layers[0].input_layernorm.weight, model.model.layers[0].input_layernorm.weight.dtype) # Counteract saved tokenizers tokenizer_name = model_name if tokenizer_name is None else tokenizer_name @@ -516,6 +517,7 @@ def from_pretrained( ) model, tokenizer = patch_tokenizer(model, tokenizer) model = post_patch_loss_function(model) + print(model.model.layers[0].input_layernorm.weight, model.model.layers[0].input_layernorm.weight.dtype) # Log Unsloth version for future fastpaths for inference if hasattr(model, "config"): From 941d1aeb8f6fb724ca2ca2bc6793980e0647931c Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Fri, 15 Aug 2025 18:52:00 -0700 Subject: [PATCH 026/154] Update vision.py --- unsloth/models/vision.py | 1 + 1 file changed, 1 insertion(+) diff --git a/unsloth/models/vision.py b/unsloth/models/vision.py index fa3bb25e12..4dc9cc4639 100644 --- a/unsloth/models/vision.py +++ b/unsloth/models/vision.py @@ -507,6 +507,7 @@ def from_pretrained( tokenizer.pad_token_id = __tokenizer.pad_token_id pass # Fix other stuff like BnB compute data types + print("do_forced_float32", do_forced_float32) model, tokenizer = patch_model_and_tokenizer( model, tokenizer, From 73fa72cb69866bec70cad78855fef994eb95b916 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Fri, 15 Aug 2025 19:13:27 -0700 Subject: [PATCH 027/154] Upcast norms --- unsloth/models/loader.py | 33 +++++++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py index 59226f0f42..edd909abfe 100644 --- a/unsloth/models/loader.py +++ b/unsloth/models/loader.py @@ -571,8 +571,15 @@ def from_pretrained( elif "qwen2.5" in lowered_model_name and transformers_version < Version("4.49.0"): raise RuntimeError("Unsloth: Qwen 2.5 only works on transformers >= 4.49.0." + LATEST) # Gemma 3 - elif "gemma-3" in lowered_model_name and transformers_version < Version("4.50.0.dev0"): - raise RuntimeError("Unsloth: Gemma 3 only works on transformers >= 4.50.0." + NIGHTLY) + elif "gemma-3" in lowered_model_name: + if transformers_version < Version("4.50.0.dev0"): + raise RuntimeError("Unsloth: Gemma 3 only works on transformers >= 4.50.0." + NIGHTLY) + # Set norms to float32 since anyways they get upcasted to float32 + os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \ + "all;None;None;"\ + "if name.endswith('norm'): "\ + "module._pre_set_compute_dtype = torch.float32\n"\ + ";" # Cohere elif "c4ai-command-a-03-2025" in lowered_model_name and transformers_version < Version("4.50.0.dev0"): raise RuntimeError("Unsloth: Cohere's Command model only works on transformers >= 4.50.0." + NIGHTLY) @@ -582,7 +589,8 @@ def from_pretrained( os.environ["UNSLOTH_DISABLE_STATIC_GENERATION"] = "1" # Sesame fails os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \ "all;torch.float32;torch.float16;"\ - "if name.endswith(('_proj', 'fc1', 'fc2', 'codebook', 'head')): module.to(torch.float16);" + "if name.endswith(('_proj', 'fc1', 'fc2', 'codebook', 'head')): module.to(torch.float16)"\ + ";" # Granite 4 elif 'granite-4' in lowered_model_name: # granite-4 rms norms are stored as 16 bit, but we upcast @@ -594,9 +602,12 @@ def from_pretrained( # Gemma 3N elif "gemma-3n" in lowered_model_name: os.environ["UNSLOTH_DISABLE_STATIC_GENERATION"] = "1" + # Set norms to float32 since anyways they get upcasted to float32 os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \ "float16;torch.float16;torch.float16;"\ - "if name.endswith(('.conv')): module;"\ + "if name.endswith('norm'): "\ + "module._pre_set_compute_dtype = torch.float32\n"\ + ";"\ "from unsloth_zoo.temporary_patches.gemma3n import patch_Gemma3nConvNormAct_forward; patch_Gemma3nConvNormAct_forward()" if transformers_version < Version("4.53.0"): @@ -606,7 +617,8 @@ def from_pretrained( # since Mamba kernels error out on using lower precision os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \ "float16;torch.float32;torch.float16;"\ - "if name.endswith(('q_proj', 'k_proj', 'v_proj', 'o_proj', 'gate_proj', 'up_proj', 'down_proj', 'head')): module.to(torch.float16);"\ + "if name.endswith(('q_proj', 'k_proj', 'v_proj', 'o_proj', 'gate_proj', 'up_proj', 'down_proj', 'head')): module.to(torch.float16)"\ + ";"\ "os.environ['TRITON_F32_DEFAULT'] = 'ieee'" elif "gpt-oss" in lowered_model_name: os.environ["UNSLOTH_DISABLE_STATIC_GENERATION"] = "1" @@ -615,22 +627,31 @@ def from_pretrained( os.environ["UNSLOTH_ENABLE_CCE"] = "0" if not load_in_4bit: # Only upcast MoE biases for MXFP4, not BnB + # Set norms to float32 since anyways they get upcasted to float32 os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \ "all;None;None;"\ "x = 'gate_up_proj_bias'\n"\ "if hasattr(module, x): "\ "setattr(module, x, torch.nn.Parameter(getattr(module, x).to(torch.float32)) if isinstance(getattr(module, x), torch.nn.Parameter) else getattr(module, x).to(torch.float32))\n"\ + ""\ "x = 'down_proj_bias'\n"\ "if hasattr(module, x): "\ "setattr(module, x, torch.nn.Parameter(getattr(module, x).to(torch.float32)) if isinstance(getattr(module, x), torch.nn.Parameter) else getattr(module, x).to(torch.float32))\n"\ + ""\ + "if name.endswith('norm'): "\ + "module._pre_set_compute_dtype = torch.float32\n"\ ";" else: # Set down projection compute dtype to be float32 for float16 machines + # Set norms to float32 since anyways they get upcasted to float32 os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \ "all;None;None;"\ - "if 'down_projs' in name and hasattr(module, 'compute_dtype') and "\ + "if 'down_projs' in name and "\ "torch.amax(dequantize_module_weight(module)) >= 1024:"\ "module._pre_set_compute_dtype = torch.float32\n"\ + ""\ + "if name.endswith('norm'): "\ + "module._pre_set_compute_dtype = torch.float32\n"\ ";" else: for check_model_name in DISABLE_COMPILE_MODEL_NAMES: From e4bbeef2c9b56635ff20ffbaff865c26a052babc Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Fri, 15 Aug 2025 19:22:19 -0700 Subject: [PATCH 028/154] Update loader.py --- unsloth/models/loader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py index edd909abfe..86850b0253 100644 --- a/unsloth/models/loader.py +++ b/unsloth/models/loader.py @@ -646,7 +646,7 @@ def from_pretrained( # Set norms to float32 since anyways they get upcasted to float32 os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \ "all;None;None;"\ - "if 'down_projs' in name and "\ + "if 'down_projs' in name and hasattr(module, 'weight') and "\ "torch.amax(dequantize_module_weight(module)) >= 1024:"\ "module._pre_set_compute_dtype = torch.float32\n"\ ""\ From c8d00bebb323700f00742dec14b1319603db7720 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Fri, 15 Aug 2025 19:25:03 -0700 Subject: [PATCH 029/154] Update vision.py --- unsloth/models/vision.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/unsloth/models/vision.py b/unsloth/models/vision.py index 4dc9cc4639..a5de457cef 100644 --- a/unsloth/models/vision.py +++ b/unsloth/models/vision.py @@ -451,7 +451,6 @@ def from_pretrained( # attn_implementation = attn_implementation, **kwargs, ) - print(model.model.layers[0].input_layernorm.weight, model.model.layers[0].input_layernorm.weight.dtype) raise_handler.remove() # Return old flag os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = old_hf_transfer @@ -470,7 +469,6 @@ def from_pretrained( if DEVICE_TYPE == "cuda": torch.cuda.empty_cache() elif DEVICE_TYPE == "xpu": torch.xpu.empty_cache() pass - print(model.model.layers[0].input_layernorm.weight, model.model.layers[0].input_layernorm.weight.dtype) # Counteract saved tokenizers tokenizer_name = model_name if tokenizer_name is None else tokenizer_name @@ -507,7 +505,6 @@ def from_pretrained( tokenizer.pad_token_id = __tokenizer.pad_token_id pass # Fix other stuff like BnB compute data types - print("do_forced_float32", do_forced_float32) model, tokenizer = patch_model_and_tokenizer( model, tokenizer, @@ -518,7 +515,6 @@ def from_pretrained( ) model, tokenizer = patch_tokenizer(model, tokenizer) model = post_patch_loss_function(model) - print(model.model.layers[0].input_layernorm.weight, model.model.layers[0].input_layernorm.weight.dtype) # Log Unsloth version for future fastpaths for inference if hasattr(model, "config"): From 564b6f8cd6f73bd0f064347a0d83ab236783317e Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Sat, 16 Aug 2025 23:10:15 -0700 Subject: [PATCH 030/154] Upcast layernorms --- unsloth/models/loader.py | 24 +++++++++--------------- unsloth/models/vision.py | 6 ++++++ 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py index 86850b0253..e59aef1fd0 100644 --- a/unsloth/models/loader.py +++ b/unsloth/models/loader.py @@ -575,11 +575,7 @@ def from_pretrained( if transformers_version < Version("4.50.0.dev0"): raise RuntimeError("Unsloth: Gemma 3 only works on transformers >= 4.50.0." + NIGHTLY) # Set norms to float32 since anyways they get upcasted to float32 - os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \ - "all;None;None;"\ - "if name.endswith('norm'): "\ - "module._pre_set_compute_dtype = torch.float32\n"\ - ";" + os.environ["UNSLOTH_HIGH_PRECISION_LAYERNORM"] = "1" # Cohere elif "c4ai-command-a-03-2025" in lowered_model_name and transformers_version < Version("4.50.0.dev0"): raise RuntimeError("Unsloth: Cohere's Command model only works on transformers >= 4.50.0." + NIGHTLY) @@ -593,25 +589,25 @@ def from_pretrained( ";" # Granite 4 elif 'granite-4' in lowered_model_name: - # granite-4 rms norms are stored as 16 bit, but we upcast - os.environ["UNSLOTH_UPCAST_LAYERNORM"] = "1" + # Granite-4 rms norms are stored as 16 bit, but we upcast + os.environ["UNSLOTH_HIGH_PRECISION_LAYERNORM"] = "1" os.environ["UNSLOTH_DISABLE_STATIC_GENERATION"] = "1" # Olmo 2 elif "olmo-2" in lowered_model_name and transformers_version < Version("4.50.0.dev0"): raise RuntimeError("Unsloth: OLMo-2 only works on transformers >= 4.50.0." + NIGHTLY) # Gemma 3N elif "gemma-3n" in lowered_model_name: + if transformers_version < Version("4.53.0"): + raise RuntimeError("Unsloth: Gemma 3N only works on transformers >= 4.53.0" + LATEST) os.environ["UNSLOTH_DISABLE_STATIC_GENERATION"] = "1" - # Set norms to float32 since anyways they get upcasted to float32 os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \ "float16;torch.float16;torch.float16;"\ "if name.endswith('norm'): "\ "module._pre_set_compute_dtype = torch.float32\n"\ ";"\ "from unsloth_zoo.temporary_patches.gemma3n import patch_Gemma3nConvNormAct_forward; patch_Gemma3nConvNormAct_forward()" - - if transformers_version < Version("4.53.0"): - raise RuntimeError("Unsloth: Gemma 3N only works on transformers >= 4.53.0" + LATEST) + # Set norms to float32 since anyways they get upcasted to float32 + os.environ["UNSLOTH_HIGH_PRECISION_LAYERNORM"] = "1" elif "falcon-h1" in lowered_model_name: # Falcon must use float32 Triton ie TRITON_F32_DEFAULT = 'ieee' # since Mamba kernels error out on using lower precision @@ -638,8 +634,6 @@ def from_pretrained( "if hasattr(module, x): "\ "setattr(module, x, torch.nn.Parameter(getattr(module, x).to(torch.float32)) if isinstance(getattr(module, x), torch.nn.Parameter) else getattr(module, x).to(torch.float32))\n"\ ""\ - "if name.endswith('norm'): "\ - "module._pre_set_compute_dtype = torch.float32\n"\ ";" else: # Set down projection compute dtype to be float32 for float16 machines @@ -650,9 +644,9 @@ def from_pretrained( "torch.amax(dequantize_module_weight(module)) >= 1024:"\ "module._pre_set_compute_dtype = torch.float32\n"\ ""\ - "if name.endswith('norm'): "\ - "module._pre_set_compute_dtype = torch.float32\n"\ ";" + # Set norms to float32 since anyways they get upcasted to float32 + os.environ["UNSLOTH_HIGH_PRECISION_LAYERNORM"] = "1" else: for check_model_name in DISABLE_COMPILE_MODEL_NAMES: if check_model_name in lowered_model_name: diff --git a/unsloth/models/vision.py b/unsloth/models/vision.py index a5de457cef..6790c5cd12 100644 --- a/unsloth/models/vision.py +++ b/unsloth/models/vision.py @@ -455,6 +455,12 @@ def from_pretrained( # Return old flag os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = old_hf_transfer + # Check float32 norm weights + if os.environ.get("UNSLOTH_HIGH_PRECISION_LAYERNORM", "0") == "1": + for jj, (name, module) in enumerate(model.named_modules()): + if name.endswith("norm") and hasattr(module, "weight"): + module._pre_set_compute_dtype = torch.float32 + pass # Edit data-types if custom_datatype is not None: with torch.no_grad(): From b8a34b4a5eeeddab69320aed0097a801d7d0b1b8 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Sun, 17 Aug 2025 16:45:46 -0700 Subject: [PATCH 031/154] Update llama.py --- unsloth/models/llama.py | 1 + 1 file changed, 1 insertion(+) diff --git a/unsloth/models/llama.py b/unsloth/models/llama.py index ae03a685eb..7217c0b593 100644 --- a/unsloth/models/llama.py +++ b/unsloth/models/llama.py @@ -170,6 +170,7 @@ def needs_device_kw(fn) -> bool: if "cache_position" in kwargs: kwargs["position_ids"] = kwargs["cache_position"] + print(attention_mask) return { "input_ids" : input_ids, "attention_mask": attention_mask, **kwargs, } pass From 509fcb5ea138a7f7d29d033399b0fd0d953499e4 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Sun, 17 Aug 2025 16:55:02 -0700 Subject: [PATCH 032/154] Update llama.py --- unsloth/models/llama.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsloth/models/llama.py b/unsloth/models/llama.py index 7217c0b593..6beb9943e8 100644 --- a/unsloth/models/llama.py +++ b/unsloth/models/llama.py @@ -170,7 +170,6 @@ def needs_device_kw(fn) -> bool: if "cache_position" in kwargs: kwargs["position_ids"] = kwargs["cache_position"] - print(attention_mask) return { "input_ids" : input_ids, "attention_mask": attention_mask, **kwargs, } pass @@ -798,6 +797,7 @@ def LlamaModel_fast_forward( pass # Ignore attention_mask + print(attention_mask) if attention_mask is None: padding_mask = None elif self.training: From 27f1a2efc64f75eade35e5322b2278bbb1b8812a Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Sun, 17 Aug 2025 17:38:42 -0700 Subject: [PATCH 033/154] Update llama.py --- unsloth/models/llama.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsloth/models/llama.py b/unsloth/models/llama.py index 6beb9943e8..763d69a5b8 100644 --- a/unsloth/models/llama.py +++ b/unsloth/models/llama.py @@ -797,7 +797,7 @@ def LlamaModel_fast_forward( pass # Ignore attention_mask - print(attention_mask) + print(attention_mask, attention_mask.dtype, attention_mask.shape, attention_mask[:, :, 0]) if attention_mask is None: padding_mask = None elif self.training: From 931851abfdd6fea51c72eee6afdc4809fec14bc3 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Sun, 17 Aug 2025 17:51:17 -0700 Subject: [PATCH 034/154] Update llama.py --- unsloth/models/llama.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsloth/models/llama.py b/unsloth/models/llama.py index 763d69a5b8..7cb39f9c77 100644 --- a/unsloth/models/llama.py +++ b/unsloth/models/llama.py @@ -797,7 +797,7 @@ def LlamaModel_fast_forward( pass # Ignore attention_mask - print(attention_mask, attention_mask.dtype, attention_mask.shape, attention_mask[:, :, 0]) + print(attention_mask, attention_mask.dtype, attention_mask.shape, attention_mask) if attention_mask is None: padding_mask = None elif self.training: From 3b9057bf81aedafba9c7d30f7e3eca80486bec07 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Sun, 17 Aug 2025 19:16:35 -0700 Subject: [PATCH 035/154] Update llama.py --- unsloth/models/llama.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/unsloth/models/llama.py b/unsloth/models/llama.py index 7cb39f9c77..4100afc60e 100644 --- a/unsloth/models/llama.py +++ b/unsloth/models/llama.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - +global final_attention_mask import torch import gc import math @@ -797,7 +797,10 @@ def LlamaModel_fast_forward( pass # Ignore attention_mask - print(attention_mask, attention_mask.dtype, attention_mask.shape, attention_mask) + if "RAISE_ATTENTION_MASK" in os.environ: + global final_attention_mask + final_attention_mask = attention_mask + raise if attention_mask is None: padding_mask = None elif self.training: From 3dd87bb0ccc3886611f7fe60e24ec97393c47342 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Mon, 18 Aug 2025 03:10:07 -0700 Subject: [PATCH 036/154] Update llama.py --- unsloth/models/llama.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/unsloth/models/llama.py b/unsloth/models/llama.py index 4100afc60e..ae03a685eb 100644 --- a/unsloth/models/llama.py +++ b/unsloth/models/llama.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -global final_attention_mask + import torch import gc import math @@ -797,10 +797,6 @@ def LlamaModel_fast_forward( pass # Ignore attention_mask - if "RAISE_ATTENTION_MASK" in os.environ: - global final_attention_mask - final_attention_mask = attention_mask - raise if attention_mask is None: padding_mask = None elif self.training: From b757faf23e7c4cdbc5eee85c39f4841fd9841450 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Mon, 18 Aug 2025 05:36:47 -0700 Subject: [PATCH 037/154] Update save.py --- unsloth/save.py | 41 +++++++++++++++++++++++++++++++++++++---- 1 file changed, 37 insertions(+), 4 deletions(-) diff --git a/unsloth/save.py b/unsloth/save.py index e6d09b78fa..ef9c84e925 100644 --- a/unsloth/save.py +++ b/unsloth/save.py @@ -1195,6 +1195,41 @@ def save_to_gguf( f"--outfile {final_location} --vocab-type {vocab_type} "\ f"--outtype {first_conversion} --concurrency {n_cpus} --pad-vocab" else: + # Fix up conversion script is possible + with open(convert_location, "rb") as f: converter_latest = f.read() + # Fix metadata + converter_latest = re.sub( + rb"(self\.metadata \= .+?\(.+?\)"\ + rb"[\n]{1,}([\s]{4,}))", + rb"\1"\ + rb"if hasattr(self.metadata, 'quantized_by'): self.metadata.quantized_by = 'Unsloth'\n"\ + rb"\2if hasattr(self.metadata, 'repo_url'): self.metadata.repo_url = 'https://huggingface.co/unsloth'\n"\ + rb"\2if hasattr(self.metadata, 'tags'): self.metadata.tags = ['unsloth', 'llama.cpp']\n"\ + rb"\2", + converter_latest, + ) + + # Make mistral_common optional for now + # from x import y + converter_latest = re.sub( + rb"(from mistral_common[^\n\(]{1,})[\s]{0,}\n", + rb"try:\n \1\nexcept:\n pass\n", + converter_latest, + ) + # from x import (y, z,) + converter_latest = re.sub( + rb"(from mistral_common[^\n\(]{1,}[\s]{0,}\(.+?\))", + rb"try:\n \1\nexcept:\n pass\n", + converter_latest, + flags = re.MULTILINE | re.DOTALL, + ) + + try: + # Write file + with open(convert_location, "wb") as file: + file.write(converter_latest) + except: + pass command = f"python {convert_location} {model_directory} "\ f"--outfile {final_location} "\ f"--outtype {first_conversion}" @@ -1694,7 +1729,7 @@ def push_to_ollama_hub(username: str, model_name: str, tag: str): print(f"\nMODEL PUBLISHED FAILED WITH RETURN CODE {return_code}") else: print("\nMODEL PUBLISHED SUCCESSFULLY") - +pass def push_to_ollama( tokenizer, @@ -1726,9 +1761,7 @@ def push_to_ollama( ) print("Successfully pushed to ollama") - - - +pass def unsloth_save_pretrained_gguf( From 2e86333f332204c613a2e5636b88f0e1ef34487d Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Mon, 18 Aug 2025 05:42:11 -0700 Subject: [PATCH 038/154] Update rl.py --- unsloth/models/rl.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/unsloth/models/rl.py b/unsloth/models/rl.py index e751ef5e30..b08d4eda62 100644 --- a/unsloth/models/rl.py +++ b/unsloth/models/rl.py @@ -487,6 +487,8 @@ def _patch_trl_rl_trainers(trainer_file = "grpo_trainer"): "logging_steps" : 1, "max_seq_length" : None, "num_generations" : 8, + "steps_per_generation" : 1, # Otherwise defaults to ga_steps which is wrong + "generation_batch_size" : None, # Useless. If steps_per_generation set, generation_batch_size clashes "top_k" : None, "vllm_mode" : "colocate", "generation_kwargs" : {}, From b01e948b8d351ce1a8ae41de55e8dc7a7648bc32 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Mon, 18 Aug 2025 05:44:09 -0700 Subject: [PATCH 039/154] Update pyproject.toml --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 6f6f225bde..f8558a83b6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,7 +37,7 @@ triton = [ ] huggingface = [ - "unsloth_zoo>=2025.8.5", + "unsloth_zoo>=2025.8.6", "packaging", "tyro", "transformers>=4.51.3,!=4.47.0,!=4.52.0,!=4.52.1,!=4.52.2,!=4.52.3,!=4.53.0", @@ -384,7 +384,7 @@ colab-ampere-torch220 = [ "flash-attn>=2.6.3", ] colab-new = [ - "unsloth_zoo>=2025.8.5", + "unsloth_zoo>=2025.8.6", "packaging", "tyro", "transformers>=4.51.3,!=4.47.0,!=4.52.0,!=4.52.1,!=4.52.2,!=4.52.3,!=4.53.0", From a751fd789636a36ba1edd75775946a1339689e00 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Mon, 18 Aug 2025 06:07:29 -0700 Subject: [PATCH 040/154] Update rl.py --- unsloth/models/rl.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/unsloth/models/rl.py b/unsloth/models/rl.py index b08d4eda62..52b1e83694 100644 --- a/unsloth/models/rl.py +++ b/unsloth/models/rl.py @@ -487,8 +487,8 @@ def _patch_trl_rl_trainers(trainer_file = "grpo_trainer"): "logging_steps" : 1, "max_seq_length" : None, "num_generations" : 8, - "steps_per_generation" : 1, # Otherwise defaults to ga_steps which is wrong - "generation_batch_size" : None, # Useless. If steps_per_generation set, generation_batch_size clashes + # "steps_per_generation" : 1, # Otherwise defaults to ga_steps which is wrong + # "generation_batch_size" : None, # Useless. If steps_per_generation set, generation_batch_size clashes "top_k" : None, "vllm_mode" : "colocate", "generation_kwargs" : {}, From 3cb6eaf68bda8bb8bad74bd2087c6f1aa366d80e Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Mon, 18 Aug 2025 06:24:30 -0700 Subject: [PATCH 041/154] Update rl_replacements.py --- unsloth/models/rl_replacements.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsloth/models/rl_replacements.py b/unsloth/models/rl_replacements.py index 2555f0df1f..717e6cbf11 100644 --- a/unsloth/models/rl_replacements.py +++ b/unsloth/models/rl_replacements.py @@ -556,7 +556,7 @@ def grpo_trainer_fix_batch_size(RLTrainer_source, RLConfig_source): " per_device_train_batch_size = num_generations\n" return check_batch_size pass -RL_CONFIG_CHANGES["grpo_trainer"].append(grpo_trainer_fix_batch_size) +# RL_CONFIG_CHANGES["grpo_trainer"].append(grpo_trainer_fix_batch_size) # Add other reward function names From de77a26c00cbc93050e103cf5060e54eac72b15c Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Mon, 18 Aug 2025 21:02:30 -0700 Subject: [PATCH 042/154] Update rl.py --- unsloth/models/rl.py | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/unsloth/models/rl.py b/unsloth/models/rl.py index 52b1e83694..4dabdee639 100644 --- a/unsloth/models/rl.py +++ b/unsloth/models/rl.py @@ -133,15 +133,18 @@ class Unsloth{RLConfig_name}({RLConfig_name}): default = -1, metadata = {{'help': 'Chunk size to reduce memory usage. -1 is most efficient.'}}, ) + {max_seq_length_pre} def __init__({RLConfig_arguments}, vllm_sampling_params = None, unsloth_num_chunks = -1, + {max_seq_length_call} **kwargs, ): {RLConfig_extra_args} super().__init__({RLConfig_call_args}{RLConfig_kwargs}) self.vllm_sampling_params = vllm_sampling_params self.unsloth_num_chunks = unsloth_num_chunks + {max_seq_length_post} pass {RLTrainer_extras} @@ -266,6 +269,21 @@ def _patch_trl_rl_trainers(trainer_file = "grpo_trainer"): extra_args += mixed_precision pass + # Check if max_seq_length is NOT defined (max_length is now default) + if "max_seq_length" not in call_args and "max_length" in call_args: + max_seq_length_pre = \ + """max_seq_length : Optional[int] = field( + default = None, + metadata = {{'help': 'Maximum sequence length to truncate to.'}}, + )""" + max_seq_length_call = "max_seq_length = max_seq_length," + max_seq_length_post = "self.max_seq_length = max_seq_length" + else: + max_seq_length_pre = "" + max_seq_length_call = "" + max_seq_length_post = "" + pass + # Check if per_device_eval_batch_size (default 8) bigger than bsz # Also use FP16 / BF16 evaluation if "args" in call_args: @@ -353,9 +371,7 @@ def _patch_trl_rl_trainers(trainer_file = "grpo_trainer"): " max_length = args.max_length\n"\ " else:\n"\ " model_max_length = getattr(model, 'max_seq_length', None)\n"\ - " # print(model_max_length, 'mml1')\n"\ " if model_max_length is None: model_max_length = getattr(model, 'max_length', None)\n"\ - " # print(model_max_length, 'mml2')\n"\ " if model_max_length is not None:\n"\ " args.max_length = model_max_length\n"\ " max_length = args.max_length\n"\ @@ -666,6 +682,10 @@ def _patch_trl_rl_trainers(trainer_file = "grpo_trainer"): RLTrainer_post = RLTrainer_post, RL_pre = RL_pre, + max_seq_length_pre = max_seq_length_pre, + max_seq_length_call = max_seq_length_call, + max_seq_length_post = max_seq_length_post, + selective_log_softmax_code = selective_log_softmax_code, ) From 27ca53180d68e80818e8e40f03e85d6abd897401 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Mon, 18 Aug 2025 21:08:45 -0700 Subject: [PATCH 043/154] Update rl.py --- unsloth/models/rl.py | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/unsloth/models/rl.py b/unsloth/models/rl.py index 4dabdee639..f21bcbe4db 100644 --- a/unsloth/models/rl.py +++ b/unsloth/models/rl.py @@ -269,21 +269,6 @@ def _patch_trl_rl_trainers(trainer_file = "grpo_trainer"): extra_args += mixed_precision pass - # Check if max_seq_length is NOT defined (max_length is now default) - if "max_seq_length" not in call_args and "max_length" in call_args: - max_seq_length_pre = \ - """max_seq_length : Optional[int] = field( - default = None, - metadata = {{'help': 'Maximum sequence length to truncate to.'}}, - )""" - max_seq_length_call = "max_seq_length = max_seq_length," - max_seq_length_post = "self.max_seq_length = max_seq_length" - else: - max_seq_length_pre = "" - max_seq_length_call = "" - max_seq_length_post = "" - pass - # Check if per_device_eval_batch_size (default 8) bigger than bsz # Also use FP16 / BF16 evaluation if "args" in call_args: @@ -551,6 +536,21 @@ def _patch_trl_rl_trainers(trainer_file = "grpo_trainer"): extra_args += learning_rate_check pass + # Check if max_seq_length is NOT defined (max_length is now default) + if "max_seq_length" not in call_args and "max_length" in call_args: + max_seq_length_pre = \ + """max_seq_length : Optional[int] = field( + default = None, + metadata = {{'help': 'Maximum sequence length to truncate to.'}}, + )""" + max_seq_length_call = "max_seq_length = max_seq_length," + max_seq_length_post = "self.max_seq_length = max_seq_length" + else: + max_seq_length_pre = "" + max_seq_length_call = "" + max_seq_length_post = "" + pass + # Add output_dir saving if "output_dir" in call_args: # Default checks From 6514c8ee55baf15360f5bf840dcaf6e8cf9eeb0f Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Mon, 18 Aug 2025 21:10:05 -0700 Subject: [PATCH 044/154] Update rl.py --- unsloth/models/rl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsloth/models/rl.py b/unsloth/models/rl.py index f21bcbe4db..afa6b25731 100644 --- a/unsloth/models/rl.py +++ b/unsloth/models/rl.py @@ -541,7 +541,7 @@ def _patch_trl_rl_trainers(trainer_file = "grpo_trainer"): max_seq_length_pre = \ """max_seq_length : Optional[int] = field( default = None, - metadata = {{'help': 'Maximum sequence length to truncate to.'}}, + metadata = {'help': 'Maximum sequence length to truncate to.'}, )""" max_seq_length_call = "max_seq_length = max_seq_length," max_seq_length_post = "self.max_seq_length = max_seq_length" From 3e29ae7ca8fa2ef130a3dedce365d5c33a7d63b7 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Mon, 18 Aug 2025 22:41:37 -0700 Subject: [PATCH 045/154] Update _utils.py --- unsloth/models/_utils.py | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/unsloth/models/_utils.py b/unsloth/models/_utils.py index 749becf098..dd1798f105 100644 --- a/unsloth/models/_utils.py +++ b/unsloth/models/_utils.py @@ -273,6 +273,38 @@ def filter(self, x): return not (self.text in x.getMessage()) except: pass +# Using a slow image processor as `use_fast` +try: + from transformers.processing_utils import logger as processing_utils_logger + processing_utils_logger.addFilter(HideLoggingMessage("`use_fast`")) + del processing_utils_logger +except: + pass + +# Using a slow image processor as `use_fast` +try: + from transformers.models.auto.image_processing_auto import logger as processing_utils_logger + processing_utils_logger.addFilter(HideLoggingMessage("`use_fast`")) + del processing_utils_logger +except: + pass + +# `use_cache=True` is incompatible with gradient checkpointing +try: + from transformers.trainer import logger as trainer_logger + trainer_logger.addFilter(HideLoggingMessage("`use_cache=True`")) + del trainer_logger +except: + pass + +# `use_cache=True` is incompatible with gradient checkpointing +try: + from transformers.utils.generic import logger as trainer_logger + trainer_logger.addFilter(HideLoggingMessage("`use_cache=True`")) + del trainer_logger +except: + pass + # Errors out on # Some weights of Gemma3nForConditionalGeneration were not initialized from the model checkpoint from transformers.modeling_utils import logger as transformers_logger From a42f6247d09a42ce858a4ce6af733463c2eb958b Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Tue, 19 Aug 2025 02:33:58 -0700 Subject: [PATCH 046/154] Update __init__.py --- unsloth/__init__.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/unsloth/__init__.py b/unsloth/__init__.py index 5d9ddbd43f..1055dfb3eb 100644 --- a/unsloth/__init__.py +++ b/unsloth/__init__.py @@ -12,6 +12,31 @@ # See the License for the specific language governing permissions and # limitations under the License. +try: + # Fix up AttributeError: 'MessageFactory' object has no attribute 'GetPrototype' + # MUST do this at the start primarily due to tensorflow causing issues + import google.protobuf.message_factory + class MessageFactory: + def CreatePrototype(self, *args, **kwargs): return + def GetMessages(self, *args, **kwargs): return + def GetPrototype(self, *args, **kwargs): return + if not hasattr(google.protobuf.message_factory, "MessageFactory"): + google.protobuf.message_factory.MessageFactory = MessageFactory + elif hasattr(google.protobuf.message_factory, "MessageFactory") and \ + not hasattr(google.protobuf.message_factory.MessageFactory, "GetPrototype") and \ + not hasattr(google.protobuf.message_factory, "GetMessageClass"): + google.protobuf.message_factory.MessageFactory = MessageFactory + elif hasattr(google.protobuf.message_factory, "MessageFactory") and \ + not hasattr(google.protobuf.message_factory.MessageFactory, "GetPrototype") and \ + hasattr(google.protobuf.message_factory, "GetMessageClass"): + GetMessageClass = google.protobuf.message_factory.GetMessageClass + def GetPrototype(self, descriptor): + return GetMessageClass(descriptor) + google.protobuf.message_factory.MessageFactory.GetPrototype = GetPrototype + pass +except: + pass + import warnings, importlib, sys from packaging.version import Version import os, re, subprocess, inspect From 9437f9e269d28070c2ee68abd6dce087b0cb78f4 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Tue, 19 Aug 2025 03:14:46 -0700 Subject: [PATCH 047/154] Torch 2.8 --- pyproject.toml | 112 ++++++++++++++++++++++++++++++++++++++- unsloth/_auto_install.py | 6 ++- 2 files changed, 116 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index f8558a83b6..0462327beb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -207,6 +207,16 @@ cu126onlytorch260 = [ "xformers @ https://download.pytorch.org/whl/cu126/xformers-0.0.29.post3-cp311-cp311-win_amd64.whl ; python_version=='3.11' and platform_system == 'Windows'", "xformers @ https://download.pytorch.org/whl/cu126/xformers-0.0.29.post3-cp312-cp312-win_amd64.whl ; python_version=='3.12' and platform_system == 'Windows'", ] +cu118onlytorch270 = [ + "xformers @ https://download.pytorch.org/whl/cu118/xformers-0.0.30-cp39-cp39-manylinux_2_28_x86_64.whl ; python_version=='3.9' and platform_system == 'Linux'", + "xformers @ https://download.pytorch.org/whl/cu118/xformers-0.0.30-cp310-cp310-manylinux_2_28_x86_64.whl ; python_version=='3.10' and platform_system == 'Linux'", + "xformers @ https://download.pytorch.org/whl/cu118/xformers-0.0.30-cp311-cp311-manylinux_2_28_x86_64.whl ; python_version=='3.11' and platform_system == 'Linux'", + "xformers @ https://download.pytorch.org/whl/cu118/xformers-0.0.30-cp312-cp312-manylinux_2_28_x86_64.whl ; python_version=='3.12' and platform_system == 'Linux'", + "xformers @ https://download.pytorch.org/whl/cu118/xformers-0.0.30-cp39-cp39-win_amd64.whl ; python_version=='3.9' and platform_system == 'Windows'", + "xformers @ https://download.pytorch.org/whl/cu118/xformers-0.0.30-cp310-cp310-win_amd64.whl ; python_version=='3.10' and platform_system == 'Windows'", + "xformers @ https://download.pytorch.org/whl/cu118/xformers-0.0.30-cp311-cp311-win_amd64.whl ; python_version=='3.11' and platform_system == 'Windows'", + "xformers @ https://download.pytorch.org/whl/cu118/xformers-0.0.30-cp312-cp312-win_amd64.whl ; python_version=='3.12' and platform_system == 'Windows'", +] cu126onlytorch270 = [ "xformers @ https://download.pytorch.org/whl/cu126/xformers-0.0.30-cp39-cp39-manylinux_2_28_x86_64.whl ; python_version=='3.9' and platform_system == 'Linux'", "xformers @ https://download.pytorch.org/whl/cu126/xformers-0.0.30-cp310-cp310-manylinux_2_28_x86_64.whl ; python_version=='3.10' and platform_system == 'Linux'", @@ -227,6 +237,30 @@ cu128onlytorch270 = [ "xformers @ https://download.pytorch.org/whl/cu128/xformers-0.0.30-cp311-cp311-win_amd64.whl ; python_version=='3.11' and platform_system == 'Windows'", "xformers @ https://download.pytorch.org/whl/cu128/xformers-0.0.30-cp312-cp312-win_amd64.whl ; python_version=='3.12' and platform_system == 'Windows'", ] +cu118onlytorch271 = [ + "xformers @ https://download.pytorch.org/whl/cu118/xformers-0.0.31.post1-cp39-abi3-manylinux_2_28_x86_64.whl ; platform_system == 'Linux'", + "xformers @ https://download.pytorch.org/whl/cu118/xformers-0.0.31.post1-cp39-abi3-win_amd64.whl ; platform_system == 'Windows'", +] +cu126onlytorch271 = [ + "xformers @ https://download.pytorch.org/whl/cu126/xformers-0.0.31.post1-cp39-abi3-manylinux_2_28_x86_64.whl ; platform_system == 'Linux'", + "xformers @ https://download.pytorch.org/whl/cu126/xformers-0.0.31.post1-cp39-abi3-win_amd64.whl ; platform_system == 'Windows'", +] +cu128onlytorch271 = [ + "xformers @ https://download.pytorch.org/whl/cu128/xformers-0.0.31.post1-cp39-abi3-manylinux_2_28_x86_64.whl ; platform_system == 'Linux'", + "xformers @ https://download.pytorch.org/whl/cu128/xformers-0.0.31.post1-cp39-abi3-win_amd64.whl ; platform_system == 'Windows'", +] +cu118onlytorch280 = [ + "xformers @ https://download.pytorch.org/whl/cu126/xformers-0.0.32.post2-cp39-abi3-manylinux_2_28_x86_64.whl ; platform_system == 'Linux'", + "xformers @ https://download.pytorch.org/whl/cu126/xformers-0.0.32.post2-cp39-abi3-win_amd64.whl ; platform_system == 'Windows'", +] +cu126onlytorch280 = [ + "xformers @ https://download.pytorch.org/whl/cu128/xformers-0.0.32.post2-cp39-abi3-manylinux_2_28_x86_64.whl ; platform_system == 'Linux'", + "xformers @ https://download.pytorch.org/whl/cu128/xformers-0.0.32.post2-cp39-abi3-win_amd64.whl ; platform_system == 'Windows'", +] +cu128onlytorch280 = [ + "xformers @ https://download.pytorch.org/whl/cu129/xformers-0.0.32.post2-cp39-abi3-manylinux_2_28_x86_64.whl ; platform_system == 'Linux'", + "xformers @ https://download.pytorch.org/whl/cu129/xformers-0.0.32.post2-cp39-abi3-win_amd64.whl ; platform_system == 'Windows'", +] cu118 = [ "unsloth[huggingface]", "bitsandbytes>=0.45.5", @@ -337,6 +371,11 @@ cu126-torch260 = [ "bitsandbytes>=0.45.5", "unsloth[cu126onlytorch260]", ] +cu118-torch270 = [ + "unsloth[huggingface]", + "bitsandbytes>=0.45.5", + "unsloth[cu118onlytorch270]", +] cu126-torch270 = [ "unsloth[huggingface]", "bitsandbytes>=0.45.5", @@ -347,6 +386,36 @@ cu128-torch270 = [ "bitsandbytes>=0.45.5", "unsloth[cu128onlytorch270]", ] +cu118-torch271 = [ + "unsloth[huggingface]", + "bitsandbytes>=0.45.5", + "unsloth[cu118onlytorch271]", +] +cu126-torch271 = [ + "unsloth[huggingface]", + "bitsandbytes>=0.45.5", + "unsloth[cu126onlytorch271]", +] +cu128-torch271 = [ + "unsloth[huggingface]", + "bitsandbytes>=0.45.5", + "unsloth[cu128onlytorch271]", +] +cu118-torch280 = [ + "unsloth[huggingface]", + "bitsandbytes>=0.45.5", + "unsloth[cu118onlytorch280]", +] +cu126-torch280 = [ + "unsloth[huggingface]", + "bitsandbytes>=0.45.5", + "unsloth[cu126onlytorch280]", +] +cu128-torch280 = [ + "unsloth[huggingface]", + "bitsandbytes>=0.45.5", + "unsloth[cu128onlytorch280]", +] kaggle = [ "unsloth[huggingface]", ] @@ -540,6 +609,12 @@ cu126-ampere-torch260 = [ "unsloth[cu126onlytorch260]", "unsloth[flashattention]", ] +cu118-ampere-torch270 = [ + "unsloth[huggingface]", + "bitsandbytes>=0.45.5", + "unsloth[cu118onlytorch270]", + "unsloth[flashattention]", +] cu126-ampere-torch270 = [ "unsloth[huggingface]", "bitsandbytes>=0.45.5", @@ -552,7 +627,42 @@ cu128-ampere-torch270 = [ "unsloth[cu128onlytorch270]", "unsloth[flashattention]", ] - +cu118-ampere-torch271 = [ + "unsloth[huggingface]", + "bitsandbytes>=0.45.5", + "unsloth[cu118onlytorch271]", + "unsloth[flashattention]", +] +cu126-ampere-torch271 = [ + "unsloth[huggingface]", + "bitsandbytes>=0.45.5", + "unsloth[cu126onlytorch271]", + "unsloth[flashattention]", +] +cu128-ampere-torch271 = [ + "unsloth[huggingface]", + "bitsandbytes>=0.45.5", + "unsloth[cu128onlytorch271]", + "unsloth[flashattention]", +] +cu118-ampere-torch280 = [ + "unsloth[huggingface]", + "bitsandbytes>=0.45.5", + "unsloth[cu118onlytorch280]", + "unsloth[flashattention]", +] +cu126-ampere-torch280 = [ + "unsloth[huggingface]", + "bitsandbytes>=0.45.5", + "unsloth[cu126onlytorch280]", + "unsloth[flashattention]", +] +cu128-ampere-torch280 = [ + "unsloth[huggingface]", + "bitsandbytes>=0.45.5", + "unsloth[cu128onlytorch280]", + "unsloth[flashattention]", +] flashattentiontorch260abiFALSEcu12x = [ "flash-attn @ https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp39-cp39-linux_x86_64.whl ; platform_system == 'Linux' and python_version == '3.9'", "flash-attn @ https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp310-cp310-linux_x86_64.whl ; platform_system == 'Linux' and python_version == '3.10'", diff --git a/unsloth/_auto_install.py b/unsloth/_auto_install.py index c8559394ed..27b23ed476 100644 --- a/unsloth/_auto_install.py +++ b/unsloth/_auto_install.py @@ -30,7 +30,11 @@ elif v < V('2.5.1'): x = 'cu{}{}-torch250' elif v <= V('2.5.1'): x = 'cu{}{}-torch251' elif v < V('2.7.0'): x = 'cu{}{}-torch260' -elif v < V('2.8.0'): x = 'cu{}{}-torch270' +elif v < V('2.7.9'): x = 'cu{}{}-torch270' +elif v < V('2.8.0'): x = 'cu{}{}-torch271' +elif v < V('2.8.9'): x = 'cu{}{}-torch280' else: raise RuntimeError(f"Torch = {v} too new!") +if v > V('2.6.9') and cuda not in ("11.8", "12.6", "12.8"): + raise RuntimeError(f"CUDA = {cuda} not supported!") x = x.format(cuda.replace(".", ""), "-ampere" if is_ampere else "") print(f'pip install --upgrade pip && pip install "unsloth[{x}] @ git+https://github.com/unslothai/unsloth.git"') \ No newline at end of file From 1dd99a2ebc8cf9b19d97ffffcc47bd27582f60cd Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Tue, 19 Aug 2025 03:16:34 -0700 Subject: [PATCH 048/154] Update rl_replacements.py --- unsloth/models/rl_replacements.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsloth/models/rl_replacements.py b/unsloth/models/rl_replacements.py index 717e6cbf11..2555f0df1f 100644 --- a/unsloth/models/rl_replacements.py +++ b/unsloth/models/rl_replacements.py @@ -556,7 +556,7 @@ def grpo_trainer_fix_batch_size(RLTrainer_source, RLConfig_source): " per_device_train_batch_size = num_generations\n" return check_batch_size pass -# RL_CONFIG_CHANGES["grpo_trainer"].append(grpo_trainer_fix_batch_size) +RL_CONFIG_CHANGES["grpo_trainer"].append(grpo_trainer_fix_batch_size) # Add other reward function names From 5349cd0fa072105ab6904b5339b814eb7ed47b1e Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Wed, 20 Aug 2025 00:10:48 -0700 Subject: [PATCH 049/154] Update loader.py --- unsloth/models/loader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py index fae6ae0770..ce09049050 100644 --- a/unsloth/models/loader.py +++ b/unsloth/models/loader.py @@ -641,7 +641,7 @@ def from_pretrained( os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \ "all;None;None;"\ "if 'down_projs' in name and hasattr(module, 'weight') and "\ - "torch.amax(dequantize_module_weight(module)) >= 1024:"\ + "torch.amax(dequantize_module_weight(module)) >= 512:"\ "module._pre_set_compute_dtype = torch.float32\n"\ ""\ ";" From 5a344c2017830ee4a8ee02e81f0383ffd8b2016f Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Wed, 20 Aug 2025 00:14:49 -0700 Subject: [PATCH 050/154] UNSLOTH_ENABLE_CCE --- unsloth/__init__.py | 6 ++++++ unsloth/models/loader.py | 3 --- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/unsloth/__init__.py b/unsloth/__init__.py index a43dc4f70f..c6851546b5 100644 --- a/unsloth/__init__.py +++ b/unsloth/__init__.py @@ -104,6 +104,12 @@ def get_device_count(): del os.environ["PYTORCH_CUDA_ALLOC_CONF"] pass +# CCE fails on Torch 2.8 and above +# OutOfResources: out of resource: shared memory, Required: 98304, Hardware limit: 65536. Reducing block sizes or `num_stages` +if (major_torch >= 2 and minor_torch >= 8) or (major_torch > 2): + os.environ["UNSLOTH_ENABLE_CCE"] = "0" +pass + # Fix Xformers performance issues since 0.0.25 import importlib.util from pathlib import Path diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py index ce09049050..94fd81d16d 100644 --- a/unsloth/models/loader.py +++ b/unsloth/models/loader.py @@ -618,9 +618,6 @@ def from_pretrained( "os.environ['TRITON_F32_DEFAULT'] = 'ieee'" elif "gpt-oss" in lowered_model_name: os.environ["UNSLOTH_DISABLE_STATIC_GENERATION"] = "1" - # CCE fails on Tesla T4 - # OutOfResources: out of resource: shared memory, Required: 98304, Hardware limit: 65536. Reducing block sizes or `num_stages` - os.environ["UNSLOTH_ENABLE_CCE"] = "0" if not load_in_4bit: # Only upcast MoE biases for MXFP4, not BnB # Set norms to float32 since anyways they get upcasted to float32 From e56363c9dcd8e7e34619261871ccf798872e0fe3 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Wed, 20 Aug 2025 00:40:23 -0700 Subject: [PATCH 051/154] Fix --- unsloth/__init__.py | 2 +- unsloth/models/loader.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/unsloth/__init__.py b/unsloth/__init__.py index c6851546b5..2c72092b57 100644 --- a/unsloth/__init__.py +++ b/unsloth/__init__.py @@ -93,7 +93,7 @@ def get_device_count(): # We support Pytorch 2 # Fixes https://github.com/unslothai/unsloth/issues/38 -torch_version = str(torch.__version__).split(".") +torch_version = str(re.match(r"[0-9\.]{3,}", str(torch.__version__)).group(0)).split(".") major_torch, minor_torch = torch_version[0], torch_version[1] major_torch, minor_torch = int(major_torch), int(minor_torch) if (major_torch < 2): diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py index 94fd81d16d..00e942ea93 100644 --- a/unsloth/models/loader.py +++ b/unsloth/models/loader.py @@ -638,7 +638,7 @@ def from_pretrained( os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \ "all;None;None;"\ "if 'down_projs' in name and hasattr(module, 'weight') and "\ - "torch.amax(dequantize_module_weight(module)) >= 512:"\ + "torch.amax(dequantize_module_weight(module)) >= 128:"\ "module._pre_set_compute_dtype = torch.float32\n"\ ""\ ";" From c79aece5377480352b1b9eb5339d175551434745 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Wed, 20 Aug 2025 01:12:42 -0700 Subject: [PATCH 052/154] Update loader.py --- unsloth/models/loader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py index 00e942ea93..050e077a39 100644 --- a/unsloth/models/loader.py +++ b/unsloth/models/loader.py @@ -638,7 +638,7 @@ def from_pretrained( os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \ "all;None;None;"\ "if 'down_projs' in name and hasattr(module, 'weight') and "\ - "torch.amax(dequantize_module_weight(module)) >= 128:"\ + "torch.amax(dequantize_module_weight(module)) >= 0:"\ "module._pre_set_compute_dtype = torch.float32\n"\ ""\ ";" From c4b530cc29c08693ce139f4c8decdfb80aed6370 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Wed, 20 Aug 2025 01:32:44 -0700 Subject: [PATCH 053/154] Update loader.py --- unsloth/models/loader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py index 050e077a39..0ff765bf4c 100644 --- a/unsloth/models/loader.py +++ b/unsloth/models/loader.py @@ -638,7 +638,7 @@ def from_pretrained( os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \ "all;None;None;"\ "if 'down_projs' in name and hasattr(module, 'weight') and "\ - "torch.amax(dequantize_module_weight(module)) >= 0:"\ + "torch.amax(dequantize_module_weight(module)) >= 1024:"\ "module._pre_set_compute_dtype = torch.float32\n"\ ""\ ";" From 0913b585eaa4d81df1ab0d2fae09f7944f5178cb Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Wed, 20 Aug 2025 01:47:06 -0700 Subject: [PATCH 054/154] Update __init__.py --- unsloth/__init__.py | 51 +++++++++++++++++++++++---------------------- 1 file changed, 26 insertions(+), 25 deletions(-) diff --git a/unsloth/__init__.py b/unsloth/__init__.py index 2c72092b57..3cb3c2e492 100644 --- a/unsloth/__init__.py +++ b/unsloth/__init__.py @@ -53,6 +53,32 @@ # Log Unsloth is being used os.environ["UNSLOTH_IS_PRESENT"] = "1" +# Fix up AttributeError: 'MessageFactory' object has no attribute 'GetPrototype' +# MUST do this at the start primarily due to tensorflow causing issues +try: + import google.protobuf.message_factory + class MessageFactory: + def CreatePrototype(self, *args, **kwargs): return + def GetMessages(self, *args, **kwargs): return + def GetPrototype(self, *args, **kwargs): return + if not hasattr(google.protobuf.message_factory, "MessageFactory"): + google.protobuf.message_factory.MessageFactory = MessageFactory + elif hasattr(google.protobuf.message_factory, "MessageFactory") and \ + not hasattr(google.protobuf.message_factory.MessageFactory, "GetPrototype") and \ + not hasattr(google.protobuf.message_factory, "GetMessageClass"): + google.protobuf.message_factory.MessageFactory = MessageFactory + elif hasattr(google.protobuf.message_factory, "MessageFactory") and \ + not hasattr(google.protobuf.message_factory.MessageFactory, "GetPrototype") and \ + hasattr(google.protobuf.message_factory, "GetMessageClass"): + GetMessageClass = google.protobuf.message_factory.GetMessageClass + def GetPrototype(self, descriptor): + return GetMessageClass(descriptor) + google.protobuf.message_factory.MessageFactory.GetPrototype = GetPrototype + pass +except: + pass + +# Try importing PyTorch and check version try: import torch except ModuleNotFoundError: @@ -246,31 +272,6 @@ def is_bf16_supported(): return SUPPORTS_BFLOAT16 raise ImportError("Unsloth: Please install unsloth_zoo via `pip install unsloth_zoo`") pass -try: - # Fix up AttributeError: 'MessageFactory' object has no attribute 'GetPrototype' - # MUST do this at the start primarily due to tensorflow causing issues - import google.protobuf.message_factory - class MessageFactory: - def CreatePrototype(self, *args, **kwargs): return - def GetMessages(self, *args, **kwargs): return - def GetPrototype(self, *args, **kwargs): return - if not hasattr(google.protobuf.message_factory, "MessageFactory"): - google.protobuf.message_factory.MessageFactory = MessageFactory - elif hasattr(google.protobuf.message_factory, "MessageFactory") and \ - not hasattr(google.protobuf.message_factory.MessageFactory, "GetPrototype") and \ - not hasattr(google.protobuf.message_factory, "GetMessageClass"): - google.protobuf.message_factory.MessageFactory = MessageFactory - elif hasattr(google.protobuf.message_factory, "MessageFactory") and \ - not hasattr(google.protobuf.message_factory.MessageFactory, "GetPrototype") and \ - hasattr(google.protobuf.message_factory, "GetMessageClass"): - GetMessageClass = google.protobuf.message_factory.GetMessageClass - def GetPrototype(self, descriptor): - return GetMessageClass(descriptor) - google.protobuf.message_factory.MessageFactory.GetPrototype = GetPrototype - pass -except: - pass - from .models import * from .models import __version__ from .save import * From 374f703ee909c56536265e1cca71306a873abd46 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Wed, 20 Aug 2025 01:49:57 -0700 Subject: [PATCH 055/154] Update __init__.py --- unsloth/__init__.py | 50 ++++++++++++++++++++++----------------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/unsloth/__init__.py b/unsloth/__init__.py index 3cb3c2e492..0430e5704d 100644 --- a/unsloth/__init__.py +++ b/unsloth/__init__.py @@ -53,31 +53,6 @@ # Log Unsloth is being used os.environ["UNSLOTH_IS_PRESENT"] = "1" -# Fix up AttributeError: 'MessageFactory' object has no attribute 'GetPrototype' -# MUST do this at the start primarily due to tensorflow causing issues -try: - import google.protobuf.message_factory - class MessageFactory: - def CreatePrototype(self, *args, **kwargs): return - def GetMessages(self, *args, **kwargs): return - def GetPrototype(self, *args, **kwargs): return - if not hasattr(google.protobuf.message_factory, "MessageFactory"): - google.protobuf.message_factory.MessageFactory = MessageFactory - elif hasattr(google.protobuf.message_factory, "MessageFactory") and \ - not hasattr(google.protobuf.message_factory.MessageFactory, "GetPrototype") and \ - not hasattr(google.protobuf.message_factory, "GetMessageClass"): - google.protobuf.message_factory.MessageFactory = MessageFactory - elif hasattr(google.protobuf.message_factory, "MessageFactory") and \ - not hasattr(google.protobuf.message_factory.MessageFactory, "GetPrototype") and \ - hasattr(google.protobuf.message_factory, "GetMessageClass"): - GetMessageClass = google.protobuf.message_factory.GetMessageClass - def GetPrototype(self, descriptor): - return GetMessageClass(descriptor) - google.protobuf.message_factory.MessageFactory.GetPrototype = GetPrototype - pass -except: - pass - # Try importing PyTorch and check version try: import torch @@ -136,6 +111,31 @@ def get_device_count(): os.environ["UNSLOTH_ENABLE_CCE"] = "0" pass +# Fix up AttributeError: 'MessageFactory' object has no attribute 'GetPrototype' +# MUST do this at the start primarily due to tensorflow causing issues +try: + import google.protobuf.message_factory + class MessageFactory: + def CreatePrototype(self, *args, **kwargs): return + def GetMessages(self, *args, **kwargs): return + def GetPrototype(self, *args, **kwargs): return + if not hasattr(google.protobuf.message_factory, "MessageFactory"): + google.protobuf.message_factory.MessageFactory = MessageFactory + elif hasattr(google.protobuf.message_factory, "MessageFactory") and \ + not hasattr(google.protobuf.message_factory.MessageFactory, "GetPrototype") and \ + not hasattr(google.protobuf.message_factory, "GetMessageClass"): + google.protobuf.message_factory.MessageFactory = MessageFactory + elif hasattr(google.protobuf.message_factory, "MessageFactory") and \ + not hasattr(google.protobuf.message_factory.MessageFactory, "GetPrototype") and \ + hasattr(google.protobuf.message_factory, "GetMessageClass"): + GetMessageClass = google.protobuf.message_factory.GetMessageClass + def GetPrototype(self, descriptor): + return GetMessageClass(descriptor) + google.protobuf.message_factory.MessageFactory.GetPrototype = GetPrototype + pass +except: + pass + # Fix Xformers performance issues since 0.0.25 import importlib.util from pathlib import Path From c0efbec6918a125859e10fa8c412d42e360548be Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Wed, 20 Aug 2025 01:51:18 -0700 Subject: [PATCH 056/154] Update __init__.py --- unsloth/__init__.py | 50 ++++++++++++++++++++++----------------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/unsloth/__init__.py b/unsloth/__init__.py index 0430e5704d..f34645651b 100644 --- a/unsloth/__init__.py +++ b/unsloth/__init__.py @@ -111,31 +111,6 @@ def get_device_count(): os.environ["UNSLOTH_ENABLE_CCE"] = "0" pass -# Fix up AttributeError: 'MessageFactory' object has no attribute 'GetPrototype' -# MUST do this at the start primarily due to tensorflow causing issues -try: - import google.protobuf.message_factory - class MessageFactory: - def CreatePrototype(self, *args, **kwargs): return - def GetMessages(self, *args, **kwargs): return - def GetPrototype(self, *args, **kwargs): return - if not hasattr(google.protobuf.message_factory, "MessageFactory"): - google.protobuf.message_factory.MessageFactory = MessageFactory - elif hasattr(google.protobuf.message_factory, "MessageFactory") and \ - not hasattr(google.protobuf.message_factory.MessageFactory, "GetPrototype") and \ - not hasattr(google.protobuf.message_factory, "GetMessageClass"): - google.protobuf.message_factory.MessageFactory = MessageFactory - elif hasattr(google.protobuf.message_factory, "MessageFactory") and \ - not hasattr(google.protobuf.message_factory.MessageFactory, "GetPrototype") and \ - hasattr(google.protobuf.message_factory, "GetMessageClass"): - GetMessageClass = google.protobuf.message_factory.GetMessageClass - def GetPrototype(self, descriptor): - return GetMessageClass(descriptor) - google.protobuf.message_factory.MessageFactory.GetPrototype = GetPrototype - pass -except: - pass - # Fix Xformers performance issues since 0.0.25 import importlib.util from pathlib import Path @@ -272,6 +247,31 @@ def is_bf16_supported(): return SUPPORTS_BFLOAT16 raise ImportError("Unsloth: Please install unsloth_zoo via `pip install unsloth_zoo`") pass +# Fix up AttributeError: 'MessageFactory' object has no attribute 'GetPrototype' +# MUST do this at the start primarily due to tensorflow causing issues +try: + import google.protobuf.message_factory + class MessageFactory: + def CreatePrototype(self, *args, **kwargs): return + def GetMessages(self, *args, **kwargs): return + def GetPrototype(self, *args, **kwargs): return + if not hasattr(google.protobuf.message_factory, "MessageFactory"): + google.protobuf.message_factory.MessageFactory = MessageFactory + elif hasattr(google.protobuf.message_factory, "MessageFactory") and \ + not hasattr(google.protobuf.message_factory.MessageFactory, "GetPrototype") and \ + not hasattr(google.protobuf.message_factory, "GetMessageClass"): + google.protobuf.message_factory.MessageFactory = MessageFactory + elif hasattr(google.protobuf.message_factory, "MessageFactory") and \ + not hasattr(google.protobuf.message_factory.MessageFactory, "GetPrototype") and \ + hasattr(google.protobuf.message_factory, "GetMessageClass"): + GetMessageClass = google.protobuf.message_factory.GetMessageClass + def GetPrototype(self, descriptor): + return GetMessageClass(descriptor) + google.protobuf.message_factory.MessageFactory.GetPrototype = GetPrototype + pass +except: + pass + from .models import * from .models import __version__ from .save import * From 761a4454a95b3ff9a6bc28c2f4ed5619df9b828f Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Wed, 20 Aug 2025 01:53:53 -0700 Subject: [PATCH 057/154] Update __init__.py --- unsloth/__init__.py | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/unsloth/__init__.py b/unsloth/__init__.py index f34645651b..95035b91b0 100644 --- a/unsloth/__init__.py +++ b/unsloth/__init__.py @@ -226,27 +226,6 @@ def is_bf16_supported(): return SUPPORTS_BFLOAT16 # TODO: check triton for intel installed properly. pass -# Check for unsloth_zoo -try: - unsloth_zoo_version = importlib_version("unsloth_zoo") - if Version(unsloth_zoo_version) < Version("2025.8.1"): - print( - "Unsloth: Please update Unsloth and Unsloth-Zoo to the latest version!\n"\ - "Do this via `pip install --upgrade --force-reinstall --no-cache-dir --no-deps unsloth unsloth_zoo`" - ) - # if os.environ.get("UNSLOTH_DISABLE_AUTO_UPDATES", "0") == "0": - # try: - # os.system("pip install --upgrade --no-cache-dir --no-deps unsloth_zoo") - # except: - # try: - # os.system("pip install --upgrade --no-cache-dir --no-deps --user unsloth_zoo") - # except: - # raise ImportError("Unsloth: Please update unsloth_zoo via `pip install --upgrade --no-cache-dir --no-deps unsloth_zoo`") - import unsloth_zoo -except: - raise ImportError("Unsloth: Please install unsloth_zoo via `pip install unsloth_zoo`") -pass - # Fix up AttributeError: 'MessageFactory' object has no attribute 'GetPrototype' # MUST do this at the start primarily due to tensorflow causing issues try: @@ -272,6 +251,27 @@ def GetPrototype(self, descriptor): except: pass +# Check for unsloth_zoo +try: + unsloth_zoo_version = importlib_version("unsloth_zoo") + if Version(unsloth_zoo_version) < Version("2025.8.1"): + print( + "Unsloth: Please update Unsloth and Unsloth-Zoo to the latest version!\n"\ + "Do this via `pip install --upgrade --force-reinstall --no-cache-dir --no-deps unsloth unsloth_zoo`" + ) + # if os.environ.get("UNSLOTH_DISABLE_AUTO_UPDATES", "0") == "0": + # try: + # os.system("pip install --upgrade --no-cache-dir --no-deps unsloth_zoo") + # except: + # try: + # os.system("pip install --upgrade --no-cache-dir --no-deps --user unsloth_zoo") + # except: + # raise ImportError("Unsloth: Please update unsloth_zoo via `pip install --upgrade --no-cache-dir --no-deps unsloth_zoo`") + import unsloth_zoo +except: + raise ImportError("Unsloth: Please install unsloth_zoo via `pip install unsloth_zoo`") +pass + from .models import * from .models import __version__ from .save import * From 30ea44c17f2b4e60b77240c1cb1ec93610c57861 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Wed, 20 Aug 2025 01:56:58 -0700 Subject: [PATCH 058/154] Import fixes --- unsloth/__init__.py | 30 ++++-------------------------- unsloth/import_fixes.py | 40 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 26 deletions(-) create mode 100644 unsloth/import_fixes.py diff --git a/unsloth/__init__.py b/unsloth/__init__.py index 95035b91b0..fd6bd7d499 100644 --- a/unsloth/__init__.py +++ b/unsloth/__init__.py @@ -17,6 +17,10 @@ import os, re, subprocess, inspect import numpy as np +# Fix some issues before importing other packages +from .import_fixes import fix_message_factory_issue +fix_message_factory_issue(); del fix_message_factory_issue; + # Check if modules that need patching are already imported critical_modules = ['trl', 'transformers', 'peft'] already_imported = [mod for mod in critical_modules if mod in sys.modules] @@ -161,7 +165,6 @@ def is_bf16_supported(): return SUPPORTS_BFLOAT16 SUPPORTS_BFLOAT16 = torch.xpu.is_bf16_supported() pass - # For Gradio HF Spaces? # if "SPACE_AUTHOR_NAME" not in os.environ and "SPACE_REPO_NAME" not in os.environ: import triton @@ -226,31 +229,6 @@ def is_bf16_supported(): return SUPPORTS_BFLOAT16 # TODO: check triton for intel installed properly. pass -# Fix up AttributeError: 'MessageFactory' object has no attribute 'GetPrototype' -# MUST do this at the start primarily due to tensorflow causing issues -try: - import google.protobuf.message_factory - class MessageFactory: - def CreatePrototype(self, *args, **kwargs): return - def GetMessages(self, *args, **kwargs): return - def GetPrototype(self, *args, **kwargs): return - if not hasattr(google.protobuf.message_factory, "MessageFactory"): - google.protobuf.message_factory.MessageFactory = MessageFactory - elif hasattr(google.protobuf.message_factory, "MessageFactory") and \ - not hasattr(google.protobuf.message_factory.MessageFactory, "GetPrototype") and \ - not hasattr(google.protobuf.message_factory, "GetMessageClass"): - google.protobuf.message_factory.MessageFactory = MessageFactory - elif hasattr(google.protobuf.message_factory, "MessageFactory") and \ - not hasattr(google.protobuf.message_factory.MessageFactory, "GetPrototype") and \ - hasattr(google.protobuf.message_factory, "GetMessageClass"): - GetMessageClass = google.protobuf.message_factory.GetMessageClass - def GetPrototype(self, descriptor): - return GetMessageClass(descriptor) - google.protobuf.message_factory.MessageFactory.GetPrototype = GetPrototype - pass -except: - pass - # Check for unsloth_zoo try: unsloth_zoo_version = importlib_version("unsloth_zoo") diff --git a/unsloth/import_fixes.py b/unsloth/import_fixes.py new file mode 100644 index 0000000000..d265a09df0 --- /dev/null +++ b/unsloth/import_fixes.py @@ -0,0 +1,40 @@ +# Copyright 2023-present Daniel Han-Chen & the Unsloth team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +def fix_message_factory_issue(): + # Fix up AttributeError: 'MessageFactory' object has no attribute 'GetPrototype' + # MUST do this at the start primarily due to tensorflow causing issues + try: + import google.protobuf.message_factory + class MessageFactory: + def CreatePrototype(self, *args, **kwargs): return + def GetMessages(self, *args, **kwargs): return + def GetPrototype(self, *args, **kwargs): return + if not hasattr(google.protobuf.message_factory, "MessageFactory"): + google.protobuf.message_factory.MessageFactory = MessageFactory + elif hasattr(google.protobuf.message_factory, "MessageFactory") and \ + not hasattr(google.protobuf.message_factory.MessageFactory, "GetPrototype") and \ + not hasattr(google.protobuf.message_factory, "GetMessageClass"): + google.protobuf.message_factory.MessageFactory = MessageFactory + elif hasattr(google.protobuf.message_factory, "MessageFactory") and \ + not hasattr(google.protobuf.message_factory.MessageFactory, "GetPrototype") and \ + hasattr(google.protobuf.message_factory, "GetMessageClass"): + GetMessageClass = google.protobuf.message_factory.GetMessageClass + def GetPrototype(self, descriptor): + return GetMessageClass(descriptor) + google.protobuf.message_factory.MessageFactory.GetPrototype = GetPrototype + pass + except: + pass +pass From c45467cfd91d5d66308f5cbc8a6ab3cc90bec5d5 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Wed, 20 Aug 2025 02:00:51 -0700 Subject: [PATCH 059/154] Update loader.py --- unsloth/models/loader.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py index 0ff765bf4c..72655782f9 100644 --- a/unsloth/models/loader.py +++ b/unsloth/models/loader.py @@ -637,8 +637,8 @@ def from_pretrained( # Set norms to float32 since anyways they get upcasted to float32 os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \ "all;None;None;"\ - "if 'down_projs' in name and hasattr(module, 'weight') and "\ - "torch.amax(dequantize_module_weight(module)) >= 1024:"\ + "if hasattr(module, 'weight') and "\ + "torch.amax(dequantize_module_weight(module)) >= 1:"\ "module._pre_set_compute_dtype = torch.float32\n"\ ""\ ";" From 55e4c78a943a52b9e0b46b29afae0f79e371573c Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Wed, 20 Aug 2025 02:15:10 -0700 Subject: [PATCH 060/154] Fix aimv2 issue --- unsloth/__init__.py | 30 +++------------- unsloth/import_fixes.py | 79 +++++++++++++++++++++++++++++++++++++++-- 2 files changed, 82 insertions(+), 27 deletions(-) diff --git a/unsloth/__init__.py b/unsloth/__init__.py index fd6bd7d499..335db48775 100644 --- a/unsloth/__init__.py +++ b/unsloth/__init__.py @@ -115,35 +115,15 @@ def get_device_count(): os.environ["UNSLOTH_ENABLE_CCE"] = "0" pass -# Fix Xformers performance issues since 0.0.25 +# Fix other issues import importlib.util from pathlib import Path from importlib.metadata import version as importlib_version from packaging.version import Version -try: - xformers_version = importlib_version("xformers") - if Version(xformers_version) < Version("0.0.29"): - xformers_location = importlib.util.find_spec("xformers").origin - xformers_location = os.path.split(xformers_location)[0] - cutlass = Path(xformers_location) / "ops" / "fmha" / "cutlass.py" - - if cutlass.exists(): - with open(cutlass, "r+", encoding = "utf-8") as f: - text = f.read() - # See https://github.com/facebookresearch/xformers/issues/1176#issuecomment-2545829591 - if "num_splits_key=-1," in text: - text = text.replace("num_splits_key=-1,", "num_splits_key=None,") - f.seek(0) - f.write(text) - f.truncate() - print("Unsloth: Patching Xformers to fix some performance issues.") - pass - pass - pass - pass -except: - pass -pass +from .import_fixes import fix_xformers_performance_issue +fix_xformers_performance_issue(); del fix_xformers_performance_issue; +from .import_fixes import fix_vllm_aimv2_issue +fix_vllm_aimv2_issue(); del fix_vllm_aimv2_issue; # Torch 2.4 has including_emulation if DEVICE_TYPE == "cuda": diff --git a/unsloth/import_fixes.py b/unsloth/import_fixes.py index d265a09df0..126aac6365 100644 --- a/unsloth/import_fixes.py +++ b/unsloth/import_fixes.py @@ -12,9 +12,16 @@ # See the License for the specific language governing permissions and # limitations under the License. +import os +import importlib.util +from pathlib import Path +from importlib.metadata import version as importlib_version +from packaging.version import Version +UNSLOTH_ENABLE_LOGGING = os.environ.get("UNSLOTH_ENABLE_LOGGING", "0") == "1" + +# Fix up AttributeError: 'MessageFactory' object has no attribute 'GetPrototype' +# MUST do this at the start primarily due to tensorflow causing issues def fix_message_factory_issue(): - # Fix up AttributeError: 'MessageFactory' object has no attribute 'GetPrototype' - # MUST do this at the start primarily due to tensorflow causing issues try: import google.protobuf.message_factory class MessageFactory: @@ -22,11 +29,15 @@ def CreatePrototype(self, *args, **kwargs): return def GetMessages(self, *args, **kwargs): return def GetPrototype(self, *args, **kwargs): return if not hasattr(google.protobuf.message_factory, "MessageFactory"): + if UNSLOTH_ENABLE_LOGGING: + print("Unsloth: Patching protobuf.MessageFactory as it doesn't exist") google.protobuf.message_factory.MessageFactory = MessageFactory elif hasattr(google.protobuf.message_factory, "MessageFactory") and \ not hasattr(google.protobuf.message_factory.MessageFactory, "GetPrototype") and \ not hasattr(google.protobuf.message_factory, "GetMessageClass"): google.protobuf.message_factory.MessageFactory = MessageFactory + if UNSLOTH_ENABLE_LOGGING: + print("Unsloth: Patching protobuf.MessageFactory as it doesn't exist") elif hasattr(google.protobuf.message_factory, "MessageFactory") and \ not hasattr(google.protobuf.message_factory.MessageFactory, "GetPrototype") and \ hasattr(google.protobuf.message_factory, "GetMessageClass"): @@ -34,7 +45,71 @@ def GetPrototype(self, *args, **kwargs): return def GetPrototype(self, descriptor): return GetMessageClass(descriptor) google.protobuf.message_factory.MessageFactory.GetPrototype = GetPrototype + if UNSLOTH_ENABLE_LOGGING: + print("Unsloth: Patching protobuf.MessageFactory.GetPrototype") pass except: pass pass + +# Fix Xformers performance issues since 0.0.25 +def fix_xformers_performance_issue(): + xformers_version = importlib_version("xformers") + if Version(xformers_version) < Version("0.0.29"): + xformers_location = importlib.util.find_spec("xformers").origin + xformers_location = os.path.split(xformers_location)[0] + cutlass = Path(xformers_location) / "ops" / "fmha" / "cutlass.py" + try: + if cutlass.exists(): + with open(cutlass, "r+", encoding = "utf-8") as f: + text = f.read() + # See https://github.com/facebookresearch/xformers/issues/1176#issuecomment-2545829591 + if "num_splits_key=-1," in text: + text = text.replace( + "num_splits_key=-1,", + "num_splits_key=None,", + ) + f.seek(0) + f.write(text) + f.truncate() + if UNSLOTH_ENABLE_LOGGING: + print("Unsloth: Patching Xformers to fix some performance issues.") + except: + pass +pass + +# ValueError: 'aimv2' is already used by a Transformers config, pick another name. +def fix_vllm_aimv2_issue(): + vllm_version = importlib_version("vllm") + if Version(vllm_version) < Version("0.10.1"): + vllm_version = importlib.util.find_spec("xformers").origin + vllm_version = os.path.split(vllm_version)[0] + ovis_config = Path(vllm_version) / "transformers_utils" / "configs" / "ovis.py" + try: + if ovis_config.exists(): + with open(ovis_config, "r+", encoding = "utf-8") as f: + text = f.read() + # See https://github.com/vllm-project/vllm-ascend/issues/2046 + if 'AutoConfig.register("aimv2", AIMv2Config)' in text: + text = text.replace( + 'AutoConfig.register("aimv2", AIMv2Config)', + '', + ) + text = text.replace( + '''backbone_config.pop('model_type') + backbone_config = AutoConfig.for_model(model_type, + **backbone_config)''', + '''if model_type != "aimv2": + backbone_config.pop('model_type') + backbone_config = AutoConfig.for_model(model_type, **backbone_config) + else: + backbone_config = AIMv2Config(**backbone_config)''' + ) + f.seek(0) + f.write(text) + f.truncate() + if UNSLOTH_ENABLE_LOGGING: + print("Unsloth: Patching vLLM to fix `'aimv2' is already used by a Transformers config, pick another name.`") + except: + pass +pass From a160e42ad8250f40b25e72e2a1b2e2d550986a65 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Wed, 20 Aug 2025 02:20:31 -0700 Subject: [PATCH 061/154] Update loader.py --- unsloth/models/loader.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py index 72655782f9..0ff765bf4c 100644 --- a/unsloth/models/loader.py +++ b/unsloth/models/loader.py @@ -637,8 +637,8 @@ def from_pretrained( # Set norms to float32 since anyways they get upcasted to float32 os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \ "all;None;None;"\ - "if hasattr(module, 'weight') and "\ - "torch.amax(dequantize_module_weight(module)) >= 1:"\ + "if 'down_projs' in name and hasattr(module, 'weight') and "\ + "torch.amax(dequantize_module_weight(module)) >= 1024:"\ "module._pre_set_compute_dtype = torch.float32\n"\ ""\ ";" From 675c4effe78a3ef5bb3f21f6892f3edc54e1e935 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Wed, 20 Aug 2025 02:23:21 -0700 Subject: [PATCH 062/154] Update import_fixes.py --- unsloth/import_fixes.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/unsloth/import_fixes.py b/unsloth/import_fixes.py index 126aac6365..1a4172e01f 100644 --- a/unsloth/import_fixes.py +++ b/unsloth/import_fixes.py @@ -54,6 +54,7 @@ def GetPrototype(self, descriptor): # Fix Xformers performance issues since 0.0.25 def fix_xformers_performance_issue(): + if importlib.util.find_spec("xformers") is None: return xformers_version = importlib_version("xformers") if Version(xformers_version) < Version("0.0.29"): xformers_location = importlib.util.find_spec("xformers").origin @@ -80,6 +81,7 @@ def fix_xformers_performance_issue(): # ValueError: 'aimv2' is already used by a Transformers config, pick another name. def fix_vllm_aimv2_issue(): + if importlib.util.find_spec("vllm") is None: return vllm_version = importlib_version("vllm") if Version(vllm_version) < Version("0.10.1"): vllm_version = importlib.util.find_spec("xformers").origin From a99d6b273c59f0908385559ba2d8b441751b6249 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Wed, 20 Aug 2025 02:26:23 -0700 Subject: [PATCH 063/154] Update import_fixes.py --- unsloth/import_fixes.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/unsloth/import_fixes.py b/unsloth/import_fixes.py index 1a4172e01f..a07f9970f8 100644 --- a/unsloth/import_fixes.py +++ b/unsloth/import_fixes.py @@ -75,8 +75,9 @@ def fix_xformers_performance_issue(): f.truncate() if UNSLOTH_ENABLE_LOGGING: print("Unsloth: Patching Xformers to fix some performance issues.") - except: - pass + except Exception as e: + if UNSLOTH_ENABLE_LOGGING: + print(f"Unsloth: Failed patching Xformers with error = {str(e)}") pass # ValueError: 'aimv2' is already used by a Transformers config, pick another name. @@ -84,7 +85,7 @@ def fix_vllm_aimv2_issue(): if importlib.util.find_spec("vllm") is None: return vllm_version = importlib_version("vllm") if Version(vllm_version) < Version("0.10.1"): - vllm_version = importlib.util.find_spec("xformers").origin + vllm_version = importlib.util.find_spec("vllm").origin vllm_version = os.path.split(vllm_version)[0] ovis_config = Path(vllm_version) / "transformers_utils" / "configs" / "ovis.py" try: @@ -112,6 +113,7 @@ def fix_vllm_aimv2_issue(): f.truncate() if UNSLOTH_ENABLE_LOGGING: print("Unsloth: Patching vLLM to fix `'aimv2' is already used by a Transformers config, pick another name.`") - except: - pass + except Exception as e: + if UNSLOTH_ENABLE_LOGGING: + print(f"Unsloth: Failed patching vLLM with error = {str(e)}") pass From 7e8262303ef06bc39367a17acf0e783abb37c1b4 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Wed, 20 Aug 2025 02:38:39 -0700 Subject: [PATCH 064/154] Update loader.py --- unsloth/models/loader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py index 0ff765bf4c..050e077a39 100644 --- a/unsloth/models/loader.py +++ b/unsloth/models/loader.py @@ -638,7 +638,7 @@ def from_pretrained( os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \ "all;None;None;"\ "if 'down_projs' in name and hasattr(module, 'weight') and "\ - "torch.amax(dequantize_module_weight(module)) >= 1024:"\ + "torch.amax(dequantize_module_weight(module)) >= 0:"\ "module._pre_set_compute_dtype = torch.float32\n"\ ""\ ";" From 0e678d6fe9ef0aeced0380184bfb9e7c9b1a1778 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Wed, 20 Aug 2025 03:38:26 -0700 Subject: [PATCH 065/154] Update loader.py --- unsloth/models/loader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py index 050e077a39..1b110ca513 100644 --- a/unsloth/models/loader.py +++ b/unsloth/models/loader.py @@ -637,7 +637,7 @@ def from_pretrained( # Set norms to float32 since anyways they get upcasted to float32 os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \ "all;None;None;"\ - "if 'down_projs' in name and hasattr(module, 'weight') and "\ + "if ('down_projs' in name or 'gate_up_projs' in name) and hasattr(module, 'weight') and "\ "torch.amax(dequantize_module_weight(module)) >= 0:"\ "module._pre_set_compute_dtype = torch.float32\n"\ ""\ From 9b82317a699779d8b96e986fe8ef7a3f16494247 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Wed, 20 Aug 2025 04:09:24 -0700 Subject: [PATCH 066/154] Update loader.py --- unsloth/models/loader.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py index 1b110ca513..0da6b83d12 100644 --- a/unsloth/models/loader.py +++ b/unsloth/models/loader.py @@ -637,8 +637,8 @@ def from_pretrained( # Set norms to float32 since anyways they get upcasted to float32 os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \ "all;None;None;"\ - "if ('down_projs' in name or 'gate_up_projs' in name) and hasattr(module, 'weight') and "\ - "torch.amax(dequantize_module_weight(module)) >= 0:"\ + "if ('down_projs' in name) and hasattr(module, 'weight') and "\ + "torch.amax(dequantize_module_weight(module)) >= 1024:"\ "module._pre_set_compute_dtype = torch.float32\n"\ ""\ ";" From 8a76fd32bdf05d3e63dd6df309b52d861e11ef3f Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Wed, 20 Aug 2025 04:39:30 -0700 Subject: [PATCH 067/154] Upgrade --- pyproject.toml | 4 ++-- unsloth/__init__.py | 2 +- unsloth/models/_utils.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index c4c3ebe6f5..83b75b0a00 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,7 +37,7 @@ triton = [ ] huggingface = [ - "unsloth_zoo>=2025.8.7", + "unsloth_zoo>=2025.8.8", "packaging", "tyro", "transformers>=4.51.3,!=4.47.0,!=4.52.0,!=4.52.1,!=4.52.2,!=4.52.3,!=4.53.0,!=4.54.0,!=4.55.0,!=4.55.1", @@ -453,7 +453,7 @@ colab-ampere-torch220 = [ "flash-attn>=2.6.3", ] colab-new = [ - "unsloth_zoo>=2025.8.7", + "unsloth_zoo>=2025.8.8", "packaging", "tyro", "transformers>=4.51.3,!=4.47.0,!=4.52.0,!=4.52.1,!=4.52.2,!=4.52.3,!=4.53.0,!=4.54.0,!=4.55.0,!=4.55.1", diff --git a/unsloth/__init__.py b/unsloth/__init__.py index 335db48775..a6ea8f4c9f 100644 --- a/unsloth/__init__.py +++ b/unsloth/__init__.py @@ -212,7 +212,7 @@ def is_bf16_supported(): return SUPPORTS_BFLOAT16 # Check for unsloth_zoo try: unsloth_zoo_version = importlib_version("unsloth_zoo") - if Version(unsloth_zoo_version) < Version("2025.8.1"): + if Version(unsloth_zoo_version) < Version("2025.8.8"): print( "Unsloth: Please update Unsloth and Unsloth-Zoo to the latest version!\n"\ "Do this via `pip install --upgrade --force-reinstall --no-cache-dir --no-deps unsloth unsloth_zoo`" diff --git a/unsloth/models/_utils.py b/unsloth/models/_utils.py index 85f1a9a960..fde776a5e6 100644 --- a/unsloth/models/_utils.py +++ b/unsloth/models/_utils.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2025.8.8" +__version__ = "2025.8.9" __all__ = [ "SUPPORTS_BFLOAT16", From 94bcb28818558f7de378ef4356b5ac6651e545fa Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Wed, 20 Aug 2025 04:40:24 -0700 Subject: [PATCH 068/154] Update loader.py --- unsloth/models/loader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py index 0da6b83d12..54d2fa2ce6 100644 --- a/unsloth/models/loader.py +++ b/unsloth/models/loader.py @@ -638,7 +638,7 @@ def from_pretrained( os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \ "all;None;None;"\ "if ('down_projs' in name) and hasattr(module, 'weight') and "\ - "torch.amax(dequantize_module_weight(module)) >= 1024:"\ + "torch.amax(dequantize_module_weight(module)) >= 0:"\ "module._pre_set_compute_dtype = torch.float32\n"\ ""\ ";" From 7d7a1156843603b2b283f77e283801feffbb0ac6 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Wed, 20 Aug 2025 05:17:37 -0700 Subject: [PATCH 069/154] Update loader.py --- unsloth/models/loader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py index 54d2fa2ce6..878a7c4a4c 100644 --- a/unsloth/models/loader.py +++ b/unsloth/models/loader.py @@ -637,7 +637,7 @@ def from_pretrained( # Set norms to float32 since anyways they get upcasted to float32 os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \ "all;None;None;"\ - "if ('down_projs' in name) and hasattr(module, 'weight') and "\ + "if hasattr(module, 'weight') and "\ "torch.amax(dequantize_module_weight(module)) >= 0:"\ "module._pre_set_compute_dtype = torch.float32\n"\ ""\ From 031f5e12487786462fc2f0306ff6792697b2dec7 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Wed, 20 Aug 2025 07:20:46 -0700 Subject: [PATCH 070/154] Update loader.py --- unsloth/models/loader.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py index 878a7c4a4c..3af8200ebb 100644 --- a/unsloth/models/loader.py +++ b/unsloth/models/loader.py @@ -636,8 +636,8 @@ def from_pretrained( # Set down projection compute dtype to be float32 for float16 machines # Set norms to float32 since anyways they get upcasted to float32 os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \ - "all;None;None;"\ - "if hasattr(module, 'weight') and "\ + "torch.float16;torch.bfloat16;torch.bfloat16;"\ + "if ('down_projs' in name) and hasattr(module, 'weight') and "\ "torch.amax(dequantize_module_weight(module)) >= 0:"\ "module._pre_set_compute_dtype = torch.float32\n"\ ""\ From 98bee64be03b6988613e2e3b1dbc5013bff3242b Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Wed, 20 Aug 2025 07:34:42 -0700 Subject: [PATCH 071/154] Update loader.py --- unsloth/models/loader.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py index 3af8200ebb..3aed8654f8 100644 --- a/unsloth/models/loader.py +++ b/unsloth/models/loader.py @@ -636,11 +636,13 @@ def from_pretrained( # Set down projection compute dtype to be float32 for float16 machines # Set norms to float32 since anyways they get upcasted to float32 os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \ - "torch.float16;torch.bfloat16;torch.bfloat16;"\ + "torch.float16;torch.bfloat16;torch.float16;"\ "if ('down_projs' in name) and hasattr(module, 'weight') and "\ "torch.amax(dequantize_module_weight(module)) >= 0:"\ "module._pre_set_compute_dtype = torch.float32\n"\ ""\ + "if ('mlp.router' in name) and hasattr(module, 'weight'):"\ + "module._pre_set_compute_dtype = torch.float32\n"\ ";" # Set norms to float32 since anyways they get upcasted to float32 os.environ["UNSLOTH_HIGH_PRECISION_LAYERNORM"] = "1" From 2ba900880d41c43e5322837d046f00425f3a249c Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Wed, 20 Aug 2025 17:24:53 -0700 Subject: [PATCH 072/154] Update vision.py --- unsloth/models/vision.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/unsloth/models/vision.py b/unsloth/models/vision.py index 6790c5cd12..2d3e0a2002 100644 --- a/unsloth/models/vision.py +++ b/unsloth/models/vision.py @@ -245,6 +245,7 @@ def unsloth_base_fast_generate( return output pass +global partial_model class FastBaseModel: @@ -454,6 +455,9 @@ def from_pretrained( raise_handler.remove() # Return old flag os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = old_hf_transfer + global partial_model + partial_model = model + raise # Check float32 norm weights if os.environ.get("UNSLOTH_HIGH_PRECISION_LAYERNORM", "0") == "1": From ea435e6d06712d59ebe00f8e23c86edacc96173a Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Wed, 20 Aug 2025 18:17:55 -0700 Subject: [PATCH 073/154] Update vision.py --- unsloth/models/vision.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/unsloth/models/vision.py b/unsloth/models/vision.py index 2d3e0a2002..a61337b791 100644 --- a/unsloth/models/vision.py +++ b/unsloth/models/vision.py @@ -455,9 +455,6 @@ def from_pretrained( raise_handler.remove() # Return old flag os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = old_hf_transfer - global partial_model - partial_model = model - raise # Check float32 norm weights if os.environ.get("UNSLOTH_HIGH_PRECISION_LAYERNORM", "0") == "1": @@ -525,6 +522,9 @@ def from_pretrained( ) model, tokenizer = patch_tokenizer(model, tokenizer) model = post_patch_loss_function(model) + global partial_model + partial_model = model + raise # Log Unsloth version for future fastpaths for inference if hasattr(model, "config"): From 5bebfa9f37b933a3b000a5aa3f22448ac8fde7c0 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Wed, 20 Aug 2025 19:13:42 -0700 Subject: [PATCH 074/154] custom_datatype --- unsloth/models/loader.py | 2 +- unsloth/models/vision.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py index 3aed8654f8..9ab990133c 100644 --- a/unsloth/models/loader.py +++ b/unsloth/models/loader.py @@ -640,7 +640,7 @@ def from_pretrained( "if ('down_projs' in name) and hasattr(module, 'weight') and "\ "torch.amax(dequantize_module_weight(module)) >= 0:"\ "module._pre_set_compute_dtype = torch.float32\n"\ - ""\ + "\n"\ "if ('mlp.router' in name) and hasattr(module, 'weight'):"\ "module._pre_set_compute_dtype = torch.float32\n"\ ";" diff --git a/unsloth/models/vision.py b/unsloth/models/vision.py index a61337b791..c57fd80ef5 100644 --- a/unsloth/models/vision.py +++ b/unsloth/models/vision.py @@ -463,6 +463,7 @@ def from_pretrained( module._pre_set_compute_dtype = torch.float32 pass # Edit data-types + print("custom_datatype", custom_datatype) if custom_datatype is not None: with torch.no_grad(): for jj, (name, module) in enumerate(model.named_modules()): From 356789a65805931f09ffca007227d203f19d1ebc Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Wed, 20 Aug 2025 19:18:03 -0700 Subject: [PATCH 075/154] recheck --- unsloth/models/loader.py | 1 + unsloth/models/vision.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py index 9ab990133c..3de0943917 100644 --- a/unsloth/models/loader.py +++ b/unsloth/models/loader.py @@ -646,6 +646,7 @@ def from_pretrained( ";" # Set norms to float32 since anyways they get upcasted to float32 os.environ["UNSLOTH_HIGH_PRECISION_LAYERNORM"] = "1" + print(os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"]) else: for check_model_name in DISABLE_COMPILE_MODEL_NAMES: if check_model_name in lowered_model_name: diff --git a/unsloth/models/vision.py b/unsloth/models/vision.py index c57fd80ef5..419d760f7a 100644 --- a/unsloth/models/vision.py +++ b/unsloth/models/vision.py @@ -359,7 +359,7 @@ def from_pretrained( custom_datatype = os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] assert custom_datatype.count(";") >= 4 checker, _dtype, _bnb_compute_dtype, _custom_datatype, execute_code = custom_datatype.split(";", 4) - + print(checker, _dtype, _bnb_compute_dtype, _custom_datatype, execute_code) # Allow custom dtypes on all runs allow_all_runs = (checker == "all") # Allow only on float16 datatypes From d0f97a9a0f295fbe08f3c6b4401b34bcea125ac1 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Wed, 20 Aug 2025 19:21:21 -0700 Subject: [PATCH 076/154] Float16 --- unsloth/models/loader.py | 5 ++--- unsloth/models/vision.py | 5 ++++- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py index 3de0943917..a7d3da17bd 100644 --- a/unsloth/models/loader.py +++ b/unsloth/models/loader.py @@ -601,7 +601,7 @@ def from_pretrained( raise RuntimeError("Unsloth: Gemma 3N only works on transformers >= 4.53.0" + LATEST) os.environ["UNSLOTH_DISABLE_STATIC_GENERATION"] = "1" os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \ - "float16;torch.float16;torch.float16;"\ + "torch.float16;torch.float16;torch.float16;"\ "if name.endswith('norm'): "\ "module._pre_set_compute_dtype = torch.float32\n"\ ";"\ @@ -612,7 +612,7 @@ def from_pretrained( # Falcon must use float32 Triton ie TRITON_F32_DEFAULT = 'ieee' # since Mamba kernels error out on using lower precision os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \ - "float16;torch.float32;torch.float16;"\ + "torch.float16;torch.float32;torch.float16;"\ "if name.endswith(('q_proj', 'k_proj', 'v_proj', 'o_proj', 'gate_proj', 'up_proj', 'down_proj', 'head')): module.to(torch.float16)"\ ";"\ "os.environ['TRITON_F32_DEFAULT'] = 'ieee'" @@ -646,7 +646,6 @@ def from_pretrained( ";" # Set norms to float32 since anyways they get upcasted to float32 os.environ["UNSLOTH_HIGH_PRECISION_LAYERNORM"] = "1" - print(os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"]) else: for check_model_name in DISABLE_COMPILE_MODEL_NAMES: if check_model_name in lowered_model_name: diff --git a/unsloth/models/vision.py b/unsloth/models/vision.py index 419d760f7a..12ec00c3bd 100644 --- a/unsloth/models/vision.py +++ b/unsloth/models/vision.py @@ -363,7 +363,10 @@ def from_pretrained( # Allow custom dtypes on all runs allow_all_runs = (checker == "all") # Allow only on float16 datatypes - allow_float16_runs = (checker == "float16" and dtype == torch.float16) + allow_float16_runs = ( + (checker == "float16" or checker == "torch.float16") and \ + (dtype == torch.float16) + ) if allow_all_runs or allow_float16_runs: if eval(_dtype) is not None: From d83767f321203359cd31a096b502b6d81181fe77 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Wed, 20 Aug 2025 19:24:26 -0700 Subject: [PATCH 077/154] Update vision.py --- unsloth/models/vision.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/unsloth/models/vision.py b/unsloth/models/vision.py index 12ec00c3bd..705647cb28 100644 --- a/unsloth/models/vision.py +++ b/unsloth/models/vision.py @@ -359,7 +359,6 @@ def from_pretrained( custom_datatype = os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] assert custom_datatype.count(";") >= 4 checker, _dtype, _bnb_compute_dtype, _custom_datatype, execute_code = custom_datatype.split(";", 4) - print(checker, _dtype, _bnb_compute_dtype, _custom_datatype, execute_code) # Allow custom dtypes on all runs allow_all_runs = (checker == "all") # Allow only on float16 datatypes @@ -367,6 +366,7 @@ def from_pretrained( (checker == "float16" or checker == "torch.float16") and \ (dtype == torch.float16) ) + print([checker], [_dtype], [_bnb_compute_dtype], [_custom_datatype], [execute_code] ) if allow_all_runs or allow_float16_runs: if eval(_dtype) is not None: @@ -387,7 +387,7 @@ def from_pretrained( if not ("attn_implementation" in kwargs): kwargs["attn_implementation"] = "sdpa" if not supports_sdpa: - print(f"Unsloth: {model_type_arch.title()} does not support SDPA - switching to eager!") + print(f"Unsloth: {model_type_arch.title()} does not support SDPA - switching to fast eager.") del kwargs["attn_implementation"] pass From 5b575d87ef24302cb434743868836bcd95acc2f2 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Wed, 20 Aug 2025 19:27:58 -0700 Subject: [PATCH 078/154] Update vision.py --- unsloth/models/vision.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsloth/models/vision.py b/unsloth/models/vision.py index 705647cb28..44f62d850d 100644 --- a/unsloth/models/vision.py +++ b/unsloth/models/vision.py @@ -366,7 +366,7 @@ def from_pretrained( (checker == "float16" or checker == "torch.float16") and \ (dtype == torch.float16) ) - print([checker], [_dtype], [_bnb_compute_dtype], [_custom_datatype], [execute_code] ) + print([allow_float16_runs], [checker], [_dtype], [_bnb_compute_dtype], [_custom_datatype], [execute_code] ) if allow_all_runs or allow_float16_runs: if eval(_dtype) is not None: From 66eee4deea47e76281497aeabc0be1a215ab9f39 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Wed, 20 Aug 2025 19:29:05 -0700 Subject: [PATCH 079/154] Update vision.py --- unsloth/models/vision.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsloth/models/vision.py b/unsloth/models/vision.py index 44f62d850d..3ce03e6da7 100644 --- a/unsloth/models/vision.py +++ b/unsloth/models/vision.py @@ -366,7 +366,7 @@ def from_pretrained( (checker == "float16" or checker == "torch.float16") and \ (dtype == torch.float16) ) - print([allow_float16_runs], [checker], [_dtype], [_bnb_compute_dtype], [_custom_datatype], [execute_code] ) + print([(checker == "float16" or checker == "torch.float16")], [dtype], [allow_float16_runs], [checker], [_dtype], [_bnb_compute_dtype], [_custom_datatype], [execute_code] ) if allow_all_runs or allow_float16_runs: if eval(_dtype) is not None: From 27d044e47840785f40a195aa7ee77dcab1149046 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Wed, 20 Aug 2025 20:38:44 -0700 Subject: [PATCH 080/154] Update vision.py --- unsloth/models/vision.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/unsloth/models/vision.py b/unsloth/models/vision.py index 3ce03e6da7..e125824c63 100644 --- a/unsloth/models/vision.py +++ b/unsloth/models/vision.py @@ -364,10 +364,8 @@ def from_pretrained( # Allow only on float16 datatypes allow_float16_runs = ( (checker == "float16" or checker == "torch.float16") and \ - (dtype == torch.float16) + (dtype == torch.float16 or os.environ.get("UNSLOTH_FORCE_FLOAT32", "0") == "1") ) - print([(checker == "float16" or checker == "torch.float16")], [dtype], [allow_float16_runs], [checker], [_dtype], [_bnb_compute_dtype], [_custom_datatype], [execute_code] ) - if allow_all_runs or allow_float16_runs: if eval(_dtype) is not None: dtype = eval(_dtype) From 34d07d89463c21cbb33275ccffaf044e3d7df243 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Wed, 20 Aug 2025 20:42:24 -0700 Subject: [PATCH 081/154] Update vision.py --- unsloth/models/vision.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/unsloth/models/vision.py b/unsloth/models/vision.py index e125824c63..23e2bb088a 100644 --- a/unsloth/models/vision.py +++ b/unsloth/models/vision.py @@ -464,7 +464,6 @@ def from_pretrained( module._pre_set_compute_dtype = torch.float32 pass # Edit data-types - print("custom_datatype", custom_datatype) if custom_datatype is not None: with torch.no_grad(): for jj, (name, module) in enumerate(model.named_modules()): @@ -524,9 +523,6 @@ def from_pretrained( ) model, tokenizer = patch_tokenizer(model, tokenizer) model = post_patch_loss_function(model) - global partial_model - partial_model = model - raise # Log Unsloth version for future fastpaths for inference if hasattr(model, "config"): From 3ad756145f638cfaa2f15a21f24d4b97d58d4ad1 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Wed, 20 Aug 2025 21:29:44 -0700 Subject: [PATCH 082/154] Update loader.py --- unsloth/models/loader.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py index a7d3da17bd..0156e2f059 100644 --- a/unsloth/models/loader.py +++ b/unsloth/models/loader.py @@ -638,11 +638,9 @@ def from_pretrained( os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \ "torch.float16;torch.bfloat16;torch.float16;"\ "if ('down_projs' in name) and hasattr(module, 'weight') and "\ - "torch.amax(dequantize_module_weight(module)) >= 0:"\ + "torch.amax(dequantize_module_weight(module)) >= 1024:"\ "module._pre_set_compute_dtype = torch.float32\n"\ "\n"\ - "if ('mlp.router' in name) and hasattr(module, 'weight'):"\ - "module._pre_set_compute_dtype = torch.float32\n"\ ";" # Set norms to float32 since anyways they get upcasted to float32 os.environ["UNSLOTH_HIGH_PRECISION_LAYERNORM"] = "1" From b75729795a21149ff23f513469f603f21ddf7a0b Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Wed, 20 Aug 2025 21:31:05 -0700 Subject: [PATCH 083/154] Update loader.py --- unsloth/models/loader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py index 0156e2f059..14baa60d66 100644 --- a/unsloth/models/loader.py +++ b/unsloth/models/loader.py @@ -638,7 +638,7 @@ def from_pretrained( os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \ "torch.float16;torch.bfloat16;torch.float16;"\ "if ('down_projs' in name) and hasattr(module, 'weight') and "\ - "torch.amax(dequantize_module_weight(module)) >= 1024:"\ + "torch.amax(dequantize_module_weight(module)) >= 102400:"\ "module._pre_set_compute_dtype = torch.float32\n"\ "\n"\ ";" From ceeca866ae8cb9774a830d3fba84c9238c281d77 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Wed, 20 Aug 2025 21:44:30 -0700 Subject: [PATCH 084/154] Update loader.py --- unsloth/models/loader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py index 14baa60d66..4e0365ce1e 100644 --- a/unsloth/models/loader.py +++ b/unsloth/models/loader.py @@ -638,7 +638,7 @@ def from_pretrained( os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \ "torch.float16;torch.bfloat16;torch.float16;"\ "if ('down_projs' in name) and hasattr(module, 'weight') and "\ - "torch.amax(dequantize_module_weight(module)) >= 102400:"\ + "torch.amax(dequantize_module_weight(module)) >= 512:"\ "module._pre_set_compute_dtype = torch.float32\n"\ "\n"\ ";" From 87758b98edf6cc2aa8addbd19cfba4678fa3cc2c Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Wed, 20 Aug 2025 21:51:36 -0700 Subject: [PATCH 085/154] Update loader.py --- unsloth/models/loader.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py index 4e0365ce1e..85696859ae 100644 --- a/unsloth/models/loader.py +++ b/unsloth/models/loader.py @@ -638,9 +638,11 @@ def from_pretrained( os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \ "torch.float16;torch.bfloat16;torch.float16;"\ "if ('down_projs' in name) and hasattr(module, 'weight') and "\ - "torch.amax(dequantize_module_weight(module)) >= 512:"\ + "torch.amax(dequantize_module_weight(module)) >= 256:"\ "module._pre_set_compute_dtype = torch.float32\n"\ "\n"\ + "if ('mlp.router' in name) and hasattr(module, 'weight'):"\ + "module._pre_set_compute_dtype = torch.float32\n"\ ";" # Set norms to float32 since anyways they get upcasted to float32 os.environ["UNSLOTH_HIGH_PRECISION_LAYERNORM"] = "1" From 97d34d48536b35c0d2fd7d60995c099aea8a6d83 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Thu, 21 Aug 2025 00:23:27 -0700 Subject: [PATCH 086/154] Update loader.py --- unsloth/models/loader.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py index 85696859ae..4e0365ce1e 100644 --- a/unsloth/models/loader.py +++ b/unsloth/models/loader.py @@ -638,11 +638,9 @@ def from_pretrained( os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \ "torch.float16;torch.bfloat16;torch.float16;"\ "if ('down_projs' in name) and hasattr(module, 'weight') and "\ - "torch.amax(dequantize_module_weight(module)) >= 256:"\ + "torch.amax(dequantize_module_weight(module)) >= 512:"\ "module._pre_set_compute_dtype = torch.float32\n"\ "\n"\ - "if ('mlp.router' in name) and hasattr(module, 'weight'):"\ - "module._pre_set_compute_dtype = torch.float32\n"\ ";" # Set norms to float32 since anyways they get upcasted to float32 os.environ["UNSLOTH_HIGH_PRECISION_LAYERNORM"] = "1" From 43bf41f9df86e3bb2bf40e4db8957e0418fbc5e6 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Thu, 21 Aug 2025 00:24:39 -0700 Subject: [PATCH 087/154] Update loader.py --- unsloth/models/loader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py index 4e0365ce1e..94a07bf06a 100644 --- a/unsloth/models/loader.py +++ b/unsloth/models/loader.py @@ -638,7 +638,7 @@ def from_pretrained( os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \ "torch.float16;torch.bfloat16;torch.float16;"\ "if ('down_projs' in name) and hasattr(module, 'weight') and "\ - "torch.amax(dequantize_module_weight(module)) >= 512:"\ + "torch.amax(dequantize_module_weight(module)) >= 256:"\ "module._pre_set_compute_dtype = torch.float32\n"\ "\n"\ ";" From 6e7ad5259d13c959cb08ee81a97547425144d639 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Thu, 21 Aug 2025 00:26:49 -0700 Subject: [PATCH 088/154] Update loader.py --- unsloth/models/loader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py index 94a07bf06a..c9c1e05553 100644 --- a/unsloth/models/loader.py +++ b/unsloth/models/loader.py @@ -638,7 +638,7 @@ def from_pretrained( os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \ "torch.float16;torch.bfloat16;torch.float16;"\ "if ('down_projs' in name) and hasattr(module, 'weight') and "\ - "torch.amax(dequantize_module_weight(module)) >= 256:"\ + "torch.amax(dequantize_module_weight(module)) >= 128:"\ "module._pre_set_compute_dtype = torch.float32\n"\ "\n"\ ";" From d605aa7311bffa8e80ae6ec3e6f34716d209e140 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Thu, 21 Aug 2025 00:35:38 -0700 Subject: [PATCH 089/154] Update loader.py --- unsloth/models/loader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py index c9c1e05553..6ec045eb36 100644 --- a/unsloth/models/loader.py +++ b/unsloth/models/loader.py @@ -638,7 +638,7 @@ def from_pretrained( os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \ "torch.float16;torch.bfloat16;torch.float16;"\ "if ('down_projs' in name) and hasattr(module, 'weight') and "\ - "torch.amax(dequantize_module_weight(module)) >= 128:"\ + "torch.amax(dequantize_module_weight(module)) >= 0:"\ "module._pre_set_compute_dtype = torch.float32\n"\ "\n"\ ";" From f417dc882969acfd9e11a4a3d0ed7b548371aa2e Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Thu, 21 Aug 2025 00:51:06 -0700 Subject: [PATCH 090/154] Update loader.py --- unsloth/models/loader.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py index 6ec045eb36..a7d3da17bd 100644 --- a/unsloth/models/loader.py +++ b/unsloth/models/loader.py @@ -641,6 +641,8 @@ def from_pretrained( "torch.amax(dequantize_module_weight(module)) >= 0:"\ "module._pre_set_compute_dtype = torch.float32\n"\ "\n"\ + "if ('mlp.router' in name) and hasattr(module, 'weight'):"\ + "module._pre_set_compute_dtype = torch.float32\n"\ ";" # Set norms to float32 since anyways they get upcasted to float32 os.environ["UNSLOTH_HIGH_PRECISION_LAYERNORM"] = "1" From 05fe3d1fd7d6f202a4f8b50262d5d00127eb72e2 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Thu, 21 Aug 2025 01:21:10 -0700 Subject: [PATCH 091/154] Update loader.py --- unsloth/models/loader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py index a7d3da17bd..28bb896760 100644 --- a/unsloth/models/loader.py +++ b/unsloth/models/loader.py @@ -637,7 +637,7 @@ def from_pretrained( # Set norms to float32 since anyways they get upcasted to float32 os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \ "torch.float16;torch.bfloat16;torch.float16;"\ - "if ('down_projs' in name) and hasattr(module, 'weight') and "\ + "if ('down_projs' in name or 'gate_up_proj' in name) and hasattr(module, 'weight') and "\ "torch.amax(dequantize_module_weight(module)) >= 0:"\ "module._pre_set_compute_dtype = torch.float32\n"\ "\n"\ From a79d6f6ac880e17b6079b1ba7981b130615a19dc Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Thu, 21 Aug 2025 01:54:48 -0700 Subject: [PATCH 092/154] Update loader.py --- unsloth/models/loader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py index 28bb896760..a7d3da17bd 100644 --- a/unsloth/models/loader.py +++ b/unsloth/models/loader.py @@ -637,7 +637,7 @@ def from_pretrained( # Set norms to float32 since anyways they get upcasted to float32 os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \ "torch.float16;torch.bfloat16;torch.float16;"\ - "if ('down_projs' in name or 'gate_up_proj' in name) and hasattr(module, 'weight') and "\ + "if ('down_projs' in name) and hasattr(module, 'weight') and "\ "torch.amax(dequantize_module_weight(module)) >= 0:"\ "module._pre_set_compute_dtype = torch.float32\n"\ "\n"\ From 59702c494078128468015ccd003761e83ca2451a Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Thu, 21 Aug 2025 02:09:23 -0700 Subject: [PATCH 093/154] Update loader.py --- unsloth/models/loader.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py index a7d3da17bd..b95678a499 100644 --- a/unsloth/models/loader.py +++ b/unsloth/models/loader.py @@ -643,6 +643,10 @@ def from_pretrained( "\n"\ "if ('mlp.router' in name) and hasattr(module, 'weight'):"\ "module._pre_set_compute_dtype = torch.float32\n"\ + "\n"\ + "if ('self_attn' in name) and hasattr(module, 'sinks'):"\ + "module.sinks._pre_set_compute_dtype = torch.float32\n"\ + "\n"\ ";" # Set norms to float32 since anyways they get upcasted to float32 os.environ["UNSLOTH_HIGH_PRECISION_LAYERNORM"] = "1" From 1b66aee7b2f395ba51e1a2e69219f2c08701a95c Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Thu, 21 Aug 2025 02:32:41 -0700 Subject: [PATCH 094/154] Update loader.py --- unsloth/models/loader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py index b95678a499..ef39e636c2 100644 --- a/unsloth/models/loader.py +++ b/unsloth/models/loader.py @@ -637,7 +637,7 @@ def from_pretrained( # Set norms to float32 since anyways they get upcasted to float32 os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \ "torch.float16;torch.bfloat16;torch.float16;"\ - "if ('down_projs' in name) and hasattr(module, 'weight') and "\ + "if ('down_projs' in name or '_proj' in name) and hasattr(module, 'weight') and "\ "torch.amax(dequantize_module_weight(module)) >= 0:"\ "module._pre_set_compute_dtype = torch.float32\n"\ "\n"\ From a71fa05c7a7a8e72547a7c054e659ce1149e088e Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Thu, 21 Aug 2025 02:51:45 -0700 Subject: [PATCH 095/154] Update loader.py --- unsloth/models/loader.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py index ef39e636c2..dd0a3961e7 100644 --- a/unsloth/models/loader.py +++ b/unsloth/models/loader.py @@ -637,7 +637,7 @@ def from_pretrained( # Set norms to float32 since anyways they get upcasted to float32 os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \ "torch.float16;torch.bfloat16;torch.float16;"\ - "if ('down_projs' in name or '_proj' in name) and hasattr(module, 'weight') and "\ + "if ('down_projs') and hasattr(module, 'weight') and "\ "torch.amax(dequantize_module_weight(module)) >= 0:"\ "module._pre_set_compute_dtype = torch.float32\n"\ "\n"\ @@ -647,6 +647,9 @@ def from_pretrained( "if ('self_attn' in name) and hasattr(module, 'sinks'):"\ "module.sinks._pre_set_compute_dtype = torch.float32\n"\ "\n"\ + "if ('embed_tokens' in name):"\ + "module.sinks._pre_set_compute_dtype = torch.float32\n"\ + "\n"\ ";" # Set norms to float32 since anyways they get upcasted to float32 os.environ["UNSLOTH_HIGH_PRECISION_LAYERNORM"] = "1" From d3e8625b1de6703165535f985d54ebf621eec1ae Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Thu, 21 Aug 2025 02:53:29 -0700 Subject: [PATCH 096/154] Update loader.py --- unsloth/models/loader.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py index dd0a3961e7..1c64ae4cfc 100644 --- a/unsloth/models/loader.py +++ b/unsloth/models/loader.py @@ -647,8 +647,8 @@ def from_pretrained( "if ('self_attn' in name) and hasattr(module, 'sinks'):"\ "module.sinks._pre_set_compute_dtype = torch.float32\n"\ "\n"\ - "if ('embed_tokens' in name):"\ - "module.sinks._pre_set_compute_dtype = torch.float32\n"\ + "if ('embed_tokens' in name) and hasattr(module, 'weight'):"\ + "module._pre_set_compute_dtype = torch.float32\n"\ "\n"\ ";" # Set norms to float32 since anyways they get upcasted to float32 From fb112cf3c6b48df1afcf51827f775ce1fee951eb Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Thu, 21 Aug 2025 03:09:03 -0700 Subject: [PATCH 097/154] Update loader.py --- unsloth/models/loader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py index 1c64ae4cfc..e8c410ebd1 100644 --- a/unsloth/models/loader.py +++ b/unsloth/models/loader.py @@ -647,7 +647,7 @@ def from_pretrained( "if ('self_attn' in name) and hasattr(module, 'sinks'):"\ "module.sinks._pre_set_compute_dtype = torch.float32\n"\ "\n"\ - "if ('embed_tokens' in name) and hasattr(module, 'weight'):"\ + "if ('embed_tokens' in name or 'lm_head' in name) and hasattr(module, 'weight'):"\ "module._pre_set_compute_dtype = torch.float32\n"\ "\n"\ ";" From 5dbdcc565dd6dc8fa5edc2bf4314ad326ffef18c Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Thu, 21 Aug 2025 03:29:27 -0700 Subject: [PATCH 098/154] Update loader.py --- unsloth/models/loader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py index e8c410ebd1..c9e0646af7 100644 --- a/unsloth/models/loader.py +++ b/unsloth/models/loader.py @@ -648,7 +648,7 @@ def from_pretrained( "module.sinks._pre_set_compute_dtype = torch.float32\n"\ "\n"\ "if ('embed_tokens' in name or 'lm_head' in name) and hasattr(module, 'weight'):"\ - "module._pre_set_compute_dtype = torch.float32\n"\ + "module._pre_set_compute_dtype = torch.bfloat16\n"\ "\n"\ ";" # Set norms to float32 since anyways they get upcasted to float32 From fdaa0074093bfffd626632bf8153d52eb7c30a4e Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Thu, 21 Aug 2025 04:02:33 -0700 Subject: [PATCH 099/154] Update loader.py --- unsloth/models/loader.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py index c9e0646af7..71459599a5 100644 --- a/unsloth/models/loader.py +++ b/unsloth/models/loader.py @@ -639,13 +639,13 @@ def from_pretrained( "torch.float16;torch.bfloat16;torch.float16;"\ "if ('down_projs') and hasattr(module, 'weight') and "\ "torch.amax(dequantize_module_weight(module)) >= 0:"\ - "module._pre_set_compute_dtype = torch.float32\n"\ + "module._pre_set_compute_dtype = torch.bfloat16\n"\ "\n"\ "if ('mlp.router' in name) and hasattr(module, 'weight'):"\ - "module._pre_set_compute_dtype = torch.float32\n"\ + "module._pre_set_compute_dtype = torch.bfloat16\n"\ "\n"\ "if ('self_attn' in name) and hasattr(module, 'sinks'):"\ - "module.sinks._pre_set_compute_dtype = torch.float32\n"\ + "module.sinks._pre_set_compute_dtype = torch.bfloat16\n"\ "\n"\ "if ('embed_tokens' in name or 'lm_head' in name) and hasattr(module, 'weight'):"\ "module._pre_set_compute_dtype = torch.bfloat16\n"\ From ba0eb04d9076811da446e8a7d46717ac91fd2ada Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Thu, 21 Aug 2025 04:19:00 -0700 Subject: [PATCH 100/154] Bug fix --- unsloth/models/loader.py | 2 +- unsloth/models/vision.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py index 71459599a5..7b8320c65a 100644 --- a/unsloth/models/loader.py +++ b/unsloth/models/loader.py @@ -637,7 +637,7 @@ def from_pretrained( # Set norms to float32 since anyways they get upcasted to float32 os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \ "torch.float16;torch.bfloat16;torch.float16;"\ - "if ('down_projs') and hasattr(module, 'weight') and "\ + "if ('_proj' in name) and hasattr(module, 'weight') and "\ "torch.amax(dequantize_module_weight(module)) >= 0:"\ "module._pre_set_compute_dtype = torch.bfloat16\n"\ "\n"\ diff --git a/unsloth/models/vision.py b/unsloth/models/vision.py index 23e2bb088a..486a049339 100644 --- a/unsloth/models/vision.py +++ b/unsloth/models/vision.py @@ -213,7 +213,8 @@ def unsloth_base_fast_generate( cache_implementation = None if cache_implementation is not None: swa = getattr(getattr(self.config, "text_config", self.config), "sliding_window", None) - if swa == 0 or type(swa) is not int: + if (swa == 0 or type(swa) is not int) \ + and (getattr(self, "_can_compile_fullgraph", True) is True): cache_implementation = "static" else: cache_implementation = "hybrid" From 3f982620a575c0117aafc572c4767d77ced7304b Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Thu, 21 Aug 2025 05:47:58 -0700 Subject: [PATCH 101/154] Update loader.py --- unsloth/models/loader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py index 7b8320c65a..f6bb23551d 100644 --- a/unsloth/models/loader.py +++ b/unsloth/models/loader.py @@ -636,7 +636,7 @@ def from_pretrained( # Set down projection compute dtype to be float32 for float16 machines # Set norms to float32 since anyways they get upcasted to float32 os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \ - "torch.float16;torch.bfloat16;torch.float16;"\ + "torch.float16;torch.bfloat16;torch.bfloat16;"\ "if ('_proj' in name) and hasattr(module, 'weight') and "\ "torch.amax(dequantize_module_weight(module)) >= 0:"\ "module._pre_set_compute_dtype = torch.bfloat16\n"\ From 3e6511b84f297289bf694893b023db35fd24fc49 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Thu, 21 Aug 2025 06:37:40 -0700 Subject: [PATCH 102/154] Update loader.py --- unsloth/models/loader.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py index f6bb23551d..889d170a17 100644 --- a/unsloth/models/loader.py +++ b/unsloth/models/loader.py @@ -785,7 +785,8 @@ def from_pretrained( model_types = ["siglip"] + model_types # Set forced float32 env flag - os.environ["UNSLOTH_FORCE_FLOAT32"] = "0" + if "UNSLOTH_FORCE_FLOAT32" not in os.environ: + os.environ["UNSLOTH_FORCE_FLOAT32"] = "0" do_forced_float32 = False for model_type_arch in model_types: if model_type_arch != "siglip": break From c9e75375b31d14c66e9f8846e2793f96e9bfee71 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Thu, 21 Aug 2025 07:00:44 -0700 Subject: [PATCH 103/154] Update loader.py --- unsloth/models/loader.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py index 889d170a17..3112f674fe 100644 --- a/unsloth/models/loader.py +++ b/unsloth/models/loader.py @@ -641,6 +641,9 @@ def from_pretrained( "torch.amax(dequantize_module_weight(module)) >= 0:"\ "module._pre_set_compute_dtype = torch.bfloat16\n"\ "\n"\ + "if hasattr(module, 'weight'):"\ + "module._pre_set_compute_dtype = torch.bfloat16\n"\ + "\n"\ "if ('mlp.router' in name) and hasattr(module, 'weight'):"\ "module._pre_set_compute_dtype = torch.bfloat16\n"\ "\n"\ From 2e38e8a9b9e46b5bb4bf026dfff677728d662297 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Fri, 22 Aug 2025 03:42:08 -0700 Subject: [PATCH 104/154] Update loader.py --- unsloth/models/loader.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py index 3112f674fe..9ae1448762 100644 --- a/unsloth/models/loader.py +++ b/unsloth/models/loader.py @@ -897,6 +897,8 @@ def from_pretrained( if load_in_4bit: # Fix up bitsandbytes config + print("torch_dtype", model.config.to_dict().get("torch_dtype")) + print("dtype", model.config.to_dict().get("dtype")) quantization_config = \ { # Sometimes torch_dtype is not a string!! From 8b3a8bacf4a19133d9d4952fad7fd65d437861a8 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Fri, 22 Aug 2025 03:44:29 -0700 Subject: [PATCH 105/154] Update loader.py --- unsloth/models/loader.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py index 9ae1448762..1b3b2d6011 100644 --- a/unsloth/models/loader.py +++ b/unsloth/models/loader.py @@ -436,10 +436,12 @@ def from_pretrained( if load_in_4bit: # Fix up bitsandbytes config + config = model.config.to_dict() + torch_dtype = config.get("dtype") or config.get("torch_dtype") quantization_config = \ { # Sometimes torch_dtype is not a string!! - "bnb_4bit_compute_dtype" : model.config.to_dict()["torch_dtype"], + "bnb_4bit_compute_dtype" : torch_dtype, "bnb_4bit_quant_type" : "nf4", "bnb_4bit_use_double_quant" : True, "llm_int8_enable_fp32_cpu_offload" : False, @@ -897,12 +899,12 @@ def from_pretrained( if load_in_4bit: # Fix up bitsandbytes config - print("torch_dtype", model.config.to_dict().get("torch_dtype")) - print("dtype", model.config.to_dict().get("dtype")) + config = model.config.to_dict() + torch_dtype = config.get("dtype") or config.get("torch_dtype") quantization_config = \ { # Sometimes torch_dtype is not a string!! - "bnb_4bit_compute_dtype" : model.config.to_dict()["torch_dtype"], + "bnb_4bit_compute_dtype" : torch_dtype, "bnb_4bit_quant_type" : "nf4", "bnb_4bit_use_double_quant" : True, "llm_int8_enable_fp32_cpu_offload" : False, From f706d20e56924bdb26190625ebb66bac4eaa63d6 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Fri, 22 Aug 2025 03:59:09 -0700 Subject: [PATCH 106/154] torch_dtype --- unsloth/models/vision.py | 19 ++++++++++++++----- unsloth/save.py | 15 +++++++++++---- 2 files changed, 25 insertions(+), 9 deletions(-) diff --git a/unsloth/models/vision.py b/unsloth/models/vision.py index 486a049339..fc31032594 100644 --- a/unsloth/models/vision.py +++ b/unsloth/models/vision.py @@ -73,6 +73,9 @@ PROMPT_LOOPKUP = dict() from transformers import GenerationConfig, CompileConfig, HybridCache +from transformers import PretrainedConfig +HAS_TORCH_DTYPE = "torch_dtype" in PretrainedConfig.__doc__ + _compile_config = CompileConfig( fullgraph = False, dynamic = None, @@ -118,7 +121,7 @@ def unsloth_base_fast_generate( bsz = input_ids.shape[0] FastBaseModel.for_inference(self) - dtype = _get_dtype(self.config.torch_dtype) + dtype = _get_dtype(getattr(self.config, "dtype", None) or getattr(self.config, "torch_dtype", None)) # Check if VLM is_vlm = any( @@ -246,8 +249,6 @@ def unsloth_base_fast_generate( return output pass -global partial_model - class FastBaseModel: @staticmethod @@ -443,11 +444,17 @@ def from_pretrained( torch_dtype = dtype if do_forced_float32: torch_dtype = torch.bfloat16 + if HAS_TORCH_DTYPE: + kwargs["torch_dtype"] = torch_dtype + else: + # Transformers removed torch_dtype + kwargs["dtype"] = torch_dtype + raise_handler = RaiseUninitialized() model = auto_model.from_pretrained( model_name, device_map = device_map, - torch_dtype = torch_dtype, + # torch_dtype = torch_dtype, # Transformers removed torch_dtype # quantization_config = bnb_config, token = token, trust_remote_code = trust_remote_code, @@ -698,7 +705,9 @@ def post_patch_model( full_finetuning = os.environ.get("UNSLOTH_ENABLE_FULL_FINETUNING", "0") == "1" float32_mixed_precision = True - if _get_dtype(model.config.torch_dtype) == torch.bfloat16 and full_finetuning: + if _get_dtype( + getattr(model.config, "dtype", None) or getattr(model.config, "torch_dtype", None) + ) == torch.bfloat16 and full_finetuning: # Use bfloat16 precision for full finetuning float32_mixed_precision = False diff --git a/unsloth/save.py b/unsloth/save.py index 9539b66701..4535c7dc42 100644 --- a/unsloth/save.py +++ b/unsloth/save.py @@ -549,11 +549,14 @@ def unsloth_save_model( from collections import OrderedDict state_dict = OrderedDict() - torch_dtype = internal_model.config.torch_dtype + torch_dtype = \ + getattr(internal_model.config, "dtype", None) or \ + getattr(internal_model.config, "torch_dtype", None) if type(torch_dtype) is str: if torch_dtype == "float16": torch_dtype = torch.float16 elif torch_dtype == "bfloat16": torch_dtype = torch.bfloat16 - pass + else: + torch_dtype = internal_model.model.embed_tokens.weight.dtype # Check modules to save float32 dtype state_dict["model.embed_tokens.weight"] = internal_model.model.embed_tokens.weight.data.to(torch_dtype) @@ -1880,7 +1883,9 @@ def unsloth_save_pretrained_gguf( for _ in range(3): gc.collect() - model_dtype = self.config.torch_dtype + model_dtype = \ + getattr(self.config, "dtype", None) or \ + getattr(self.config, "torch_dtype", None) model_type = self.config.model_type if type(model_dtype) is str: assert(model_dtype == "float16" or model_dtype == "bfloat16") @@ -2058,7 +2063,9 @@ def unsloth_push_to_hub_gguf( for _ in range(3): gc.collect() - model_dtype = self.config.torch_dtype + model_dtype = \ + getattr(self.config, "dtype", None) or \ + getattr(self.config, "torch_dtype", None) model_type = self.config.model_type if type(model_dtype) is str: assert(model_dtype == "float16" or model_dtype == "bfloat16") From b56cc1b82cfb64a02bbe7a12afd1c05eaa4bf53d Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Thu, 4 Sep 2025 03:33:54 -0700 Subject: [PATCH 107/154] Update rl.py --- unsloth/models/rl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsloth/models/rl.py b/unsloth/models/rl.py index 0f1fa2dbf6..b1ab96c840 100644 --- a/unsloth/models/rl.py +++ b/unsloth/models/rl.py @@ -513,7 +513,7 @@ def _patch_trl_rl_trainers(trainer_file = "grpo_trainer"): "fp16" : False, "include_tokens_per_second" : False, "include_num_input_tokens_seen" : False, - "auto_find_batch_size" : True, # Auto /2 batch size + "auto_find_batch_size" : False, # Auto /2 batch size - too many people complained so removing "dataloader_pin_memory" : True, # Might fail so disable for now # "dataloader_persistent_workers" : True, # Keeps dataloader in RAM From c47f9367f53c0495bace2aa145252955d620aa78 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Thu, 4 Sep 2025 03:55:38 -0700 Subject: [PATCH 108/154] Fix CE Loss --- unsloth/models/llama.py | 4 ++-- unsloth/models/mistral.py | 29 +++++++++++++++++++++-------- 2 files changed, 23 insertions(+), 10 deletions(-) diff --git a/unsloth/models/llama.py b/unsloth/models/llama.py index cf2ca75f75..f978060c9c 100644 --- a/unsloth/models/llama.py +++ b/unsloth/models/llama.py @@ -1236,7 +1236,7 @@ def _CausalLM_fast_forward( # < 1024 Normal Unsloth uses less VRAM! if bsz*q_len <= 1024: RETURN_LOGITS = True - if not RETURN_LOGITS and HAS_CUT_CROSS_ENTROPY and labels is not None: + if not RETURN_LOGITS and labels is not None: n_items = kwargs.get("num_items_in_batch", None) or kwargs.get("n_items", None) @@ -1259,7 +1259,7 @@ def _CausalLM_fast_forward( mask = None, n_items = n_items, scaling = getattr(self, "accelerator_scaler", None), - target_gb = 1, + target_gb = None, torch_compile = True, logit_softcapping = logit_softcapping, ) diff --git a/unsloth/models/mistral.py b/unsloth/models/mistral.py index 6274f2e5df..faab2d30b1 100644 --- a/unsloth/models/mistral.py +++ b/unsloth/models/mistral.py @@ -300,17 +300,30 @@ def MistralForCausalLM_fast_forward( # < 1024 Normal Unsloth uses less VRAM! if bsz * q_len <= 1024: RETURN_LOGITS = True - if not RETURN_LOGITS and HAS_CUT_CROSS_ENTROPY and os.environ.get("UNSLOTH_ENABLE_CCE", "1") != "0" and labels is not None: + if not RETURN_LOGITS and labels is not None: n_items = kwargs.get("num_items_in_batch", None) or kwargs.get("n_items", None) logit_softcapping = getattr(self.config, "final_logit_softcapping", 0) - loss = fused_linear_cross_entropy( - hidden_states = hidden_states, - lm_weight = lm_head, - labels = labels, - num_items_in_batch = n_items, - logit_softcapping = logit_softcapping, - ) + # loss = fused_linear_cross_entropy( + # hidden_states = hidden_states, + # lm_weight = lm_head, + # labels = labels, + # num_items_in_batch = n_items, + # logit_softcapping = logit_softcapping, + # ) + loss = unsloth_fused_ce_loss( + trainer = None, + hidden_states = hidden_states, + lm_head_weight = lm_head, + lm_head_bias = None, + labels = labels, + mask = None, + n_items = n_items, + scaling = getattr(self, "accelerator_scaler", None), + target_gb = None, + torch_compile = True, + logit_softcapping = logit_softcapping, + ) if not return_dict: output = (logits,) + outputs[1:] return (loss,) + output if loss is not None else output From 0b896c5f93e10a24b6db32d96627bb4482ff7558 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Thu, 4 Sep 2025 05:11:33 -0700 Subject: [PATCH 109/154] Versioning --- pyproject.toml | 4 ++-- unsloth/__init__.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 8c60cb5866..160182c2a2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,7 +37,7 @@ triton = [ ] huggingface = [ - "unsloth_zoo>=2025.8.9", + "unsloth_zoo>=2025.9.1", "packaging", "tyro", "transformers>=4.51.3,!=4.52.0,!=4.52.1,!=4.52.2,!=4.52.3,!=4.53.0,!=4.54.0,!=4.55.0,!=4.55.1", @@ -453,7 +453,7 @@ colab-ampere-torch220 = [ "flash-attn>=2.6.3", ] colab-new = [ - "unsloth_zoo>=2025.8.9", + "unsloth_zoo>=2025.9.1", "packaging", "tyro", "transformers>=4.51.3,!=4.47.0,!=4.52.0,!=4.52.1,!=4.52.2,!=4.52.3,!=4.53.0,!=4.54.0,!=4.55.0,!=4.55.1", diff --git a/unsloth/__init__.py b/unsloth/__init__.py index 1b2a9310ff..25a54165b7 100644 --- a/unsloth/__init__.py +++ b/unsloth/__init__.py @@ -214,7 +214,7 @@ def is_bf16_supported(): return SUPPORTS_BFLOAT16 # Check for unsloth_zoo try: unsloth_zoo_version = importlib_version("unsloth_zoo") - if Version(unsloth_zoo_version) < Version("2025.8.8"): + if Version(unsloth_zoo_version) < Version("2025.9.1"): print( "Unsloth: Please update Unsloth and Unsloth-Zoo to the latest version!\n"\ "Do this via `pip install --upgrade --force-reinstall --no-cache-dir --no-deps unsloth unsloth_zoo`" From 7234a62f5b40d2ee96e65570a8e7a769e5449271 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Tue, 9 Sep 2025 01:59:13 -0700 Subject: [PATCH 110/154] Update loader.py --- unsloth/models/loader.py | 1 + 1 file changed, 1 insertion(+) diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py index b1844a1472..952f900ff4 100644 --- a/unsloth/models/loader.py +++ b/unsloth/models/loader.py @@ -527,6 +527,7 @@ def from_pretrained( qat_scheme = None, *args, **kwargs, ): + print("model_name", model_name) if token is None: token = get_token() # Login to allow private models if token is not None: From 68c1aba08999d4f8801cda2194bcab5234109f31 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Tue, 9 Sep 2025 02:01:49 -0700 Subject: [PATCH 111/154] Update loader.py --- unsloth/models/loader.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py index 952f900ff4..b689b1f3c2 100644 --- a/unsloth/models/loader.py +++ b/unsloth/models/loader.py @@ -254,7 +254,9 @@ def from_pretrained( # Get base model for PEFT: if is_peft: # Check base model again for PEFT + print("is_peft", model_name) model_name = peft_config.base_model_name_or_path + print("is_peft", model_name) if not use_exact_model_name: model_name = get_model_name(model_name, load_in_4bit) model_config = AutoConfig.from_pretrained( From 05fc2f2628b54ee2e867ff5c307abcfda7310cce Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Tue, 9 Sep 2025 04:31:12 -0700 Subject: [PATCH 112/154] extract_model_type_from_config --- unsloth/models/_utils.py | 33 ++++++++++++++++++++++++++++++++- unsloth/models/loader.py | 4 +--- 2 files changed, 33 insertions(+), 4 deletions(-) diff --git a/unsloth/models/_utils.py b/unsloth/models/_utils.py index 597ed0244b..0346ba13c1 100644 --- a/unsloth/models/_utils.py +++ b/unsloth/models/_utils.py @@ -12,12 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2025.9.2" +__version__ = "2025.9.3" __all__ = [ "SUPPORTS_BFLOAT16", "is_bfloat16_supported", "is_vLLM_available", + "extract_model_type_from_config", "prepare_model_for_kbit_training", "xformers", @@ -1561,3 +1562,33 @@ def _prepare_model_for_qat(model: torch.nn.Module, qat_scheme: str) -> torch.nn. quantize_(model, QATConfig(base_config, step="prepare"), filter_fn=filter_fn) return model pass + + +def extract_model_type_from_config(config): + """ Gets model_type from config file - can be PEFT or normal HF """ + model_type = None + from peft import PeftConfig + if issubclass(type(config), PeftConfig): + model_type_list = re.finditer(r"transformers\.models\.([^\.]{2,})\.modeling_\1", str(config)) + model_type_list = list(model_type_list) + # Use transformers.models.gpt_oss.modeling_gpt_oss + if len(model_type_list) != 0: + model_type = model_type_list[0].group(1) + elif hasattr(config, "auto_mapping"): + # Use GptOssForCausalLM + model_type = config.auto_mapping.get("base_model_class", None) + if model_type is None: + # Last resort use model name unsloth/gpt-oss-20b-unsloth-bnb-4bit + model_type = config.base_model_name_or_path + model_type = os.path.split(model_type)[-1] + else: + + if model_type is None: + raise TypeError(f"Unsloth: Cannot determine model type for config file: {str(config)}") + + # Standardize model_type + model_type = model_type.lower() + model_type = model_type.replace("_", "-") + model_type = model_type.replace("/", "-") + return model_type +pass diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py index c0b996ae02..9c26c8834e 100644 --- a/unsloth/models/loader.py +++ b/unsloth/models/loader.py @@ -20,6 +20,7 @@ HAS_FLASH_ATTENTION_SOFTCAPPING, USE_MODELSCOPE, get_transformers_model_type, + extract_model_type_from_config, ) from .granite import FastGraniteModel from .llama import FastLlamaModel, logger @@ -254,9 +255,7 @@ def from_pretrained( # Get base model for PEFT: if is_peft: # Check base model again for PEFT - print("is_peft", model_name) model_name = peft_config.base_model_name_or_path - print("is_peft", model_name) if not use_exact_model_name: model_name = get_model_name(model_name, load_in_4bit) model_config = AutoConfig.from_pretrained( @@ -529,7 +528,6 @@ def from_pretrained( qat_scheme = None, *args, **kwargs, ): - print("model_name", model_name) if token is None: token = get_token() # Login to allow private models if token is not None: From 99c7afb3fcc8aaa755dba2ad9f74140ff978028c Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Tue, 9 Sep 2025 21:51:46 -0700 Subject: [PATCH 113/154] Model types --- unsloth/models/_utils.py | 39 +++++++++++--- unsloth/models/loader.py | 114 +++++++++++++++++++++------------------ 2 files changed, 92 insertions(+), 61 deletions(-) diff --git a/unsloth/models/_utils.py b/unsloth/models/_utils.py index 0346ba13c1..f961a49de5 100644 --- a/unsloth/models/_utils.py +++ b/unsloth/models/_utils.py @@ -1566,7 +1566,9 @@ def _prepare_model_for_qat(model: torch.nn.Module, qat_scheme: str) -> torch.nn. def extract_model_type_from_config(config): """ Gets model_type from config file - can be PEFT or normal HF """ - model_type = None + if config is None: + raise TypeError(f"Unsloth: Cannot determine model type for config file: {str(config)}") + model_types = None from peft import PeftConfig if issubclass(type(config), PeftConfig): model_type_list = re.finditer(r"transformers\.models\.([^\.]{2,})\.modeling_\1", str(config)) @@ -1574,6 +1576,7 @@ def extract_model_type_from_config(config): # Use transformers.models.gpt_oss.modeling_gpt_oss if len(model_type_list) != 0: model_type = model_type_list[0].group(1) + model_types = [model_type] elif hasattr(config, "auto_mapping"): # Use GptOssForCausalLM model_type = config.auto_mapping.get("base_model_class", None) @@ -1581,14 +1584,34 @@ def extract_model_type_from_config(config): # Last resort use model name unsloth/gpt-oss-20b-unsloth-bnb-4bit model_type = config.base_model_name_or_path model_type = os.path.split(model_type)[-1] + model_types = [model_type] else: - - if model_type is None: + from collections.abc import Mapping, Sequence + def find_values(data, target_key): + stack = [data] + while stack: + obj = stack.pop() + if isinstance(obj, Mapping): + # Emit values for matches + if target_key in obj: + yield obj[target_key] + # Keep walking into nested values + stack.extend(obj.values()) + elif isinstance(obj, Sequence) and not isinstance(obj, (str, bytes, bytearray)): + # Walk sequences (lists/tuples/sets), but not strings/bytes + stack.extend(obj) + model_types = list(find_values(getattr(config, "to_dict", lambda *args, **kwargs: {})(), "model_type")) + pass + if model_types is None: raise TypeError(f"Unsloth: Cannot determine model type for config file: {str(config)}") - # Standardize model_type - model_type = model_type.lower() - model_type = model_type.replace("_", "-") - model_type = model_type.replace("/", "-") - return model_type + final_model_types = [] + for model_type in model_types: + model_type = model_type.lower() + model_type = model_type.replace("_", "") + model_type = model_type.replace("-", "") + model_type = model_type.replace("/", "") + model_type = model_type.replace(".", "") + final_model_types.append(model_type) + return tuple(sorted(final_model_types)) pass diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py index 9c26c8834e..6cefe33aaf 100644 --- a/unsloth/models/loader.py +++ b/unsloth/models/loader.py @@ -84,7 +84,8 @@ global FORCE_FLOAT32 FORCE_FLOAT32 = [ "gemma3", - "gpt_oss", + "gemma3n", + "gptoss", ] class FastLanguageModel(FastLlamaModel): @@ -178,6 +179,8 @@ def from_pretrained( autoconfig_error = None peft_error = None + model_config = None + peft_config = None try: model_config = AutoConfig.from_pretrained( model_name, @@ -201,8 +204,12 @@ def from_pretrained( peft_error = str(error) is_peft = False pass - - # Both config.json and adapter_config.json should not exist! + model_types = extract_model_type_from_config(model_config or peft_config) + if len(model_types) == 1: + model_type = model_types[0] + else: + # Leave as tuple if more than one arch + model_type = model_types # Old transformers versions check both_exist = (is_model and is_peft) and not SUPPORTS_LLAMA32 @@ -267,8 +274,6 @@ def from_pretrained( if not was_disabled: enable_progress_bars() - model_type = model_config.model_type - if model_type == "llama": scaling_type = None if getattr(model_config, "rope_scaling", None) is not None: @@ -494,10 +499,11 @@ def from_pretrained( from transformers import AutoModelForVision2Seq pass +# Must be alphabetically sorted for each entry DISABLE_COMPILE_MODEL_NAMES = [ - "aya-vision", + "ayavision", "modernbert", - "granite-vision", + "granite,llavanext,siglipvisionmodel", # Granite-vision 3 ] @@ -574,20 +580,55 @@ def from_pretrained( if not use_exact_model_name: model_name = get_model_name(model_name, load_in_4bit) + # First check if it's a normal model via AutoConfig + from huggingface_hub.utils import disable_progress_bars, enable_progress_bars, are_progress_bars_disabled + was_disabled = are_progress_bars_disabled() + disable_progress_bars() + + autoconfig_error = None + peft_error = None + model_config = None + peft_config = None + try: + model_config = AutoConfig.from_pretrained( + model_name, + token = token, + revision = revision, + trust_remote_code = trust_remote_code, + ) + is_model = True + except Exception as error: + autoconfig_error = str(error) + is_model = False + try: + peft_config = PeftConfig.from_pretrained( + model_name, + token = token, + revision = revision, + trust_remote_code = trust_remote_code, + ) + is_peft = True + except Exception as error: + peft_error = str(error) + is_peft = False + pass + model_types = extract_model_type_from_config(model_config or peft_config) + model_types_all = ",".join(model_types) + # Check versions lowered_model_name = model_name.lower() os.environ["UNSLOTH_MODEL_NAME"] = lowered_model_name LATEST = '\nPlease use transformers via `pip install --no-deps git+https://github.com/huggingface/transformers.git`' NIGHTLY = '\nPlease use nightly transformers via pip install --upgrade "transformers>=4.49.0"`' # Pixtral - if "pixtral" in lowered_model_name and transformers_version < Version("4.49.0"): + if "pixtral" in model_types_all and transformers_version < Version("4.49.0"): raise RuntimeError("Unsloth: Pixtral only works on transformers >= 4.49.0." + LATEST) # Qwen 2.5 - elif "qwen2.5" in lowered_model_name and transformers_version < Version("4.49.0"): + elif "qwen25" in model_types_all and transformers_version < Version("4.49.0"): raise RuntimeError("Unsloth: Qwen 2.5 only works on transformers >= 4.49.0." + LATEST) # Gemma 3 - elif "gemma-3" in lowered_model_name: - if "gemma-3n" in lowered_model_name: + elif "gemma3" in model_types_all: + if "gemma3n" in model_types_all: if transformers_version < Version("4.53.0"): raise RuntimeError("Unsloth: Gemma 3N only works on transformers >= 4.53.0" + LATEST) os.environ["UNSLOTH_DISABLE_STATIC_GENERATION"] = "1" @@ -605,10 +646,10 @@ def from_pretrained( # common in both gemma-3 and gemma-3n os.environ["UNSLOTH_HIGH_PRECISION_LAYERNORM"] = "1" # Cohere - elif "c4ai-command-a-03-2025" in lowered_model_name and transformers_version < Version("4.50.0.dev0"): + elif "cohere2" in model_types_all and transformers_version < Version("4.50.0.dev0"): raise RuntimeError("Unsloth: Cohere's Command model only works on transformers >= 4.50.0." + NIGHTLY) # Sesame - elif "csm-1b" in lowered_model_name: + elif "csm" in model_types_all: os.environ["UNSLOTH_COMPILE_DISABLE"] = "1" # Inference is too slow os.environ["UNSLOTH_DISABLE_STATIC_GENERATION"] = "1" # Sesame fails os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \ @@ -616,14 +657,14 @@ def from_pretrained( "if name.endswith(('_proj', 'fc1', 'fc2', 'codebook', 'head')): module.to(torch.float16)"\ ";" # Granite 4 - elif 'granite-4' in lowered_model_name: + elif 'granitemoehybrid' in model_types_all: # Granite-4 rms norms are stored as 16 bit, but we upcast os.environ["UNSLOTH_HIGH_PRECISION_LAYERNORM"] = "1" os.environ["UNSLOTH_DISABLE_STATIC_GENERATION"] = "1" # Olmo 2 - elif "olmo-2" in lowered_model_name and transformers_version < Version("4.50.0.dev0"): + elif "olmo2" in model_types_all and transformers_version < Version("4.50.0.dev0"): raise RuntimeError("Unsloth: OLMo-2 only works on transformers >= 4.50.0." + NIGHTLY) - elif "falcon-h1" in lowered_model_name: + elif "falconh1" in model_types_all: # Falcon must use float32 Triton ie TRITON_F32_DEFAULT = 'ieee' # since Mamba kernels error out on using lower precision os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \ @@ -631,7 +672,7 @@ def from_pretrained( "if name.endswith(('q_proj', 'k_proj', 'v_proj', 'o_proj', 'gate_proj', 'up_proj', 'down_proj', 'head')): module.to(torch.float16)"\ ";"\ "os.environ['TRITON_F32_DEFAULT'] = 'ieee'" - elif "gpt-oss" in lowered_model_name: + elif "gptoss" in model_types_all: os.environ["UNSLOTH_DISABLE_STATIC_GENERATION"] = "1" if not load_in_4bit: # Only upcast MoE biases for MXFP4, not BnB @@ -681,39 +722,6 @@ def from_pretrained( model_name = snapshot_download(model_name) pass - # First check if it's a normal model via AutoConfig - from huggingface_hub.utils import disable_progress_bars, enable_progress_bars, are_progress_bars_disabled - was_disabled = are_progress_bars_disabled() - disable_progress_bars() - - autoconfig_error = None - peft_error = None - try: - model_config = AutoConfig.from_pretrained( - model_name, - token = token, - revision = revision, - trust_remote_code = trust_remote_code, - ) - is_model = True - except Exception as error: - autoconfig_error = str(error) - is_model = False - try: - peft_config = PeftConfig.from_pretrained( - model_name, - token = token, - revision = revision, - trust_remote_code = trust_remote_code, - ) - is_peft = True - except Exception as error: - peft_error = str(error) - is_peft = False - pass - - # Both config.json and adapter_config.json should not exist! - # Old transformers versions check both_exist = (is_model and is_peft) and not SUPPORTS_LLAMA32 @@ -799,8 +807,8 @@ def from_pretrained( if model_type_arch != "siglip": break global FORCE_FLOAT32 for disable_name in FORCE_FLOAT32: - if (disable_name.lower() == model_type_arch.lower().replace("-", "_") or \ - disable_name.lower() in model_name.lower()) and \ + if (disable_name.lower() == model_type_arch.lower().replace("-", "").replace("_", "") or \ + disable_name.lower() in model_types_all) and \ ((dtype == torch.float16) or not SUPPORTS_BFLOAT16): os.environ["UNSLOTH_FORCE_FLOAT32"] = "1" dtype = torch.bfloat16 # Change to bfloat16 loading @@ -846,7 +854,7 @@ def from_pretrained( ) pass # Fix SDPA - if "gemma-3n" in lowered_model_name: + if "gemma3n" in model_types_all: supports_sdpa = False pass From fc5d91de3b2200e6a4a32e865c5f18272271de5a Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Tue, 9 Sep 2025 22:02:56 -0700 Subject: [PATCH 114/154] Update loader.py --- unsloth/models/loader.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py index 6cefe33aaf..44a74601d9 100644 --- a/unsloth/models/loader.py +++ b/unsloth/models/loader.py @@ -205,6 +205,7 @@ def from_pretrained( is_peft = False pass model_types = extract_model_type_from_config(model_config or peft_config) + print("model_types", model_types) if len(model_types) == 1: model_type = model_types[0] else: @@ -614,6 +615,7 @@ def from_pretrained( pass model_types = extract_model_type_from_config(model_config or peft_config) model_types_all = ",".join(model_types) + print("model_types", model_types) # Check versions lowered_model_name = model_name.lower() From 702a9ead13538d5a930c9a2f644fb92671dd35f2 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Tue, 9 Sep 2025 22:11:08 -0700 Subject: [PATCH 115/154] get_transformers_model_type --- unsloth/models/_utils.py | 54 ---------------------------------------- unsloth/models/loader.py | 26 ++++++------------- 2 files changed, 8 insertions(+), 72 deletions(-) diff --git a/unsloth/models/_utils.py b/unsloth/models/_utils.py index f961a49de5..56b98489f6 100644 --- a/unsloth/models/_utils.py +++ b/unsloth/models/_utils.py @@ -18,7 +18,6 @@ "SUPPORTS_BFLOAT16", "is_bfloat16_supported", "is_vLLM_available", - "extract_model_type_from_config", "prepare_model_for_kbit_training", "xformers", @@ -1562,56 +1561,3 @@ def _prepare_model_for_qat(model: torch.nn.Module, qat_scheme: str) -> torch.nn. quantize_(model, QATConfig(base_config, step="prepare"), filter_fn=filter_fn) return model pass - - -def extract_model_type_from_config(config): - """ Gets model_type from config file - can be PEFT or normal HF """ - if config is None: - raise TypeError(f"Unsloth: Cannot determine model type for config file: {str(config)}") - model_types = None - from peft import PeftConfig - if issubclass(type(config), PeftConfig): - model_type_list = re.finditer(r"transformers\.models\.([^\.]{2,})\.modeling_\1", str(config)) - model_type_list = list(model_type_list) - # Use transformers.models.gpt_oss.modeling_gpt_oss - if len(model_type_list) != 0: - model_type = model_type_list[0].group(1) - model_types = [model_type] - elif hasattr(config, "auto_mapping"): - # Use GptOssForCausalLM - model_type = config.auto_mapping.get("base_model_class", None) - if model_type is None: - # Last resort use model name unsloth/gpt-oss-20b-unsloth-bnb-4bit - model_type = config.base_model_name_or_path - model_type = os.path.split(model_type)[-1] - model_types = [model_type] - else: - from collections.abc import Mapping, Sequence - def find_values(data, target_key): - stack = [data] - while stack: - obj = stack.pop() - if isinstance(obj, Mapping): - # Emit values for matches - if target_key in obj: - yield obj[target_key] - # Keep walking into nested values - stack.extend(obj.values()) - elif isinstance(obj, Sequence) and not isinstance(obj, (str, bytes, bytearray)): - # Walk sequences (lists/tuples/sets), but not strings/bytes - stack.extend(obj) - model_types = list(find_values(getattr(config, "to_dict", lambda *args, **kwargs: {})(), "model_type")) - pass - if model_types is None: - raise TypeError(f"Unsloth: Cannot determine model type for config file: {str(config)}") - # Standardize model_type - final_model_types = [] - for model_type in model_types: - model_type = model_type.lower() - model_type = model_type.replace("_", "") - model_type = model_type.replace("-", "") - model_type = model_type.replace("/", "") - model_type = model_type.replace(".", "") - final_model_types.append(model_type) - return tuple(sorted(final_model_types)) -pass diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py index 44a74601d9..7e8a32caa7 100644 --- a/unsloth/models/loader.py +++ b/unsloth/models/loader.py @@ -20,7 +20,6 @@ HAS_FLASH_ATTENTION_SOFTCAPPING, USE_MODELSCOPE, get_transformers_model_type, - extract_model_type_from_config, ) from .granite import FastGraniteModel from .llama import FastLlamaModel, logger @@ -204,8 +203,7 @@ def from_pretrained( peft_error = str(error) is_peft = False pass - model_types = extract_model_type_from_config(model_config or peft_config) - print("model_types", model_types) + model_types = get_transformers_model_type(model_config or peft_config) if len(model_types) == 1: model_type = model_types[0] else: @@ -581,6 +579,12 @@ def from_pretrained( if not use_exact_model_name: model_name = get_model_name(model_name, load_in_4bit) + # Check modelscope + if USE_MODELSCOPE and not os.path.exists(model_name): + from modelscope import snapshot_download + model_name = snapshot_download(model_name) + pass + # First check if it's a normal model via AutoConfig from huggingface_hub.utils import disable_progress_bars, enable_progress_bars, are_progress_bars_disabled was_disabled = are_progress_bars_disabled() @@ -613,9 +617,8 @@ def from_pretrained( peft_error = str(error) is_peft = False pass - model_types = extract_model_type_from_config(model_config or peft_config) + model_types = get_transformers_model_type(model_config or peft_config) model_types_all = ",".join(model_types) - print("model_types", model_types) # Check versions lowered_model_name = model_name.lower() @@ -719,11 +722,6 @@ def from_pretrained( os.environ["UNSLOTH_DISABLE_STATIC_GENERATION"] = "1" pass - if USE_MODELSCOPE and not os.path.exists(model_name): - from modelscope import snapshot_download - model_name = snapshot_download(model_name) - pass - # Old transformers versions check both_exist = (is_model and is_peft) and not SUPPORTS_LLAMA32 @@ -793,15 +791,7 @@ def from_pretrained( else: redirector = contextlib.redirect_stdout(open(os.devnull, "w")) - # Get model types like Gemma3 etc - model_types = get_transformers_model_type( - model_name = model_name, - token = token, - revision = revision, - trust_remote_code = trust_remote_code, - ) model_types = ["siglip"] + model_types - # Set forced float32 env flag os.environ["UNSLOTH_FORCE_FLOAT32"] = "0" do_forced_float32 = False From 8ece4a6f915e27f536202017132d031094a518ac Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Tue, 9 Sep 2025 22:14:10 -0700 Subject: [PATCH 116/154] Update loader.py --- unsloth/models/loader.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py index 7e8a32caa7..43c14050c2 100644 --- a/unsloth/models/loader.py +++ b/unsloth/models/loader.py @@ -204,6 +204,7 @@ def from_pretrained( is_peft = False pass model_types = get_transformers_model_type(model_config or peft_config) + print("model_types", model_types) if len(model_types) == 1: model_type = model_types[0] else: @@ -619,6 +620,7 @@ def from_pretrained( pass model_types = get_transformers_model_type(model_config or peft_config) model_types_all = ",".join(model_types) + print("model_types", model_types) # Check versions lowered_model_name = model_name.lower() From f3ac0e3b6d382dd432af4a49c919e4d8a2700480 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Tue, 9 Sep 2025 22:18:59 -0700 Subject: [PATCH 117/154] Update loader.py --- unsloth/models/loader.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py index 43c14050c2..27fb3afe41 100644 --- a/unsloth/models/loader.py +++ b/unsloth/models/loader.py @@ -84,7 +84,7 @@ FORCE_FLOAT32 = [ "gemma3", "gemma3n", - "gptoss", + "gpt_oss", ] class FastLanguageModel(FastLlamaModel): @@ -204,7 +204,6 @@ def from_pretrained( is_peft = False pass model_types = get_transformers_model_type(model_config or peft_config) - print("model_types", model_types) if len(model_types) == 1: model_type = model_types[0] else: @@ -501,9 +500,9 @@ def from_pretrained( # Must be alphabetically sorted for each entry DISABLE_COMPILE_MODEL_NAMES = [ - "ayavision", + "aya_vision", "modernbert", - "granite,llavanext,siglipvisionmodel", # Granite-vision 3 + "granite,llava_next", # Granite-vision 3 ] @@ -620,7 +619,6 @@ def from_pretrained( pass model_types = get_transformers_model_type(model_config or peft_config) model_types_all = ",".join(model_types) - print("model_types", model_types) # Check versions lowered_model_name = model_name.lower() @@ -631,7 +629,7 @@ def from_pretrained( if "pixtral" in model_types_all and transformers_version < Version("4.49.0"): raise RuntimeError("Unsloth: Pixtral only works on transformers >= 4.49.0." + LATEST) # Qwen 2.5 - elif "qwen25" in model_types_all and transformers_version < Version("4.49.0"): + elif "qwen2_5" in model_types_all and transformers_version < Version("4.49.0"): raise RuntimeError("Unsloth: Qwen 2.5 only works on transformers >= 4.49.0." + LATEST) # Gemma 3 elif "gemma3" in model_types_all: @@ -671,7 +669,7 @@ def from_pretrained( # Olmo 2 elif "olmo2" in model_types_all and transformers_version < Version("4.50.0.dev0"): raise RuntimeError("Unsloth: OLMo-2 only works on transformers >= 4.50.0." + NIGHTLY) - elif "falconh1" in model_types_all: + elif "falcon_h1" in model_types_all: # Falcon must use float32 Triton ie TRITON_F32_DEFAULT = 'ieee' # since Mamba kernels error out on using lower precision os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \ @@ -679,7 +677,7 @@ def from_pretrained( "if name.endswith(('q_proj', 'k_proj', 'v_proj', 'o_proj', 'gate_proj', 'up_proj', 'down_proj', 'head')): module.to(torch.float16)"\ ";"\ "os.environ['TRITON_F32_DEFAULT'] = 'ieee'" - elif "gptoss" in model_types_all: + elif "gpt_oss" in model_types_all: os.environ["UNSLOTH_DISABLE_STATIC_GENERATION"] = "1" if not load_in_4bit: # Only upcast MoE biases for MXFP4, not BnB From d2b0d4193a6e32cf370f2008d8ad05011a6ad0a6 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Tue, 9 Sep 2025 22:22:15 -0700 Subject: [PATCH 118/154] Update loader.py --- unsloth/models/loader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py index 27fb3afe41..de2f32f9af 100644 --- a/unsloth/models/loader.py +++ b/unsloth/models/loader.py @@ -809,7 +809,7 @@ def from_pretrained( # Patch gradient checkpointing if use_gradient_checkpointing == "unsloth": patch_unsloth_smart_gradient_checkpointing(dtype = dtype) - + print(model_types) with redirector: patch_loss_functions(torch_compile = False) model_types, supports_sdpa = unsloth_compile_transformers( From e5920fe7027e7caf8602fc9a7d602a84ef197bed Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Wed, 10 Sep 2025 01:21:49 -0700 Subject: [PATCH 119/154] Update rl.py --- unsloth/models/rl.py | 42 ++++++++++++++++++++++-------------------- 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/unsloth/models/rl.py b/unsloth/models/rl.py index f342a4d86b..14b75f6746 100644 --- a/unsloth/models/rl.py +++ b/unsloth/models/rl.py @@ -44,6 +44,8 @@ } from trl import __version__ as trl_version +from unsloth_zoo.utils import Version +trl_version = Version(trl_version) def vLLMSamplingParams(**kwargs): from vllm import SamplingParams @@ -804,7 +806,7 @@ def patch_functions(RLTrainer, trainer_file, RLTrainer_name, all_imports, import " " * 12 + "if (getattr(args, 'use_vllm', False) == False):\n" + \ " " * 16 + "args.use_vllm = True\n" - if "grpo" in trainer_file and trl_version >= "0.18": + if "grpo" in trainer_file and trl_version >= Version("0.18.0"): # If model has vllm_engine, then use vllm in colocate mode. Donot wait for server vllm_setter += \ " " * 12 + "args.vllm_mode='colocate'\n" @@ -850,26 +852,27 @@ def patch_functions(RLTrainer, trainer_file, RLTrainer_name, all_imports, import sampling_params # Add spaces # count the indentation of last line of sampling_params. - last_line = sampling_params.split("\n")[-1] - last_prev_line = sampling_params.split("\n")[-2] - last_prev_indentation = len(last_prev_line) - len(last_prev_line.lstrip()) - last_indentation = len(last_line) - len(last_line.lstrip()) - - - # Add extra arguments to SamplingParams - extra = "**getattr(getattr(args, 'vllm_sampling_params', vLLMSamplingParams()), '_set_kwargs', {})" - # Backwards replace - to_replace = ",\n" + " "*last_prev_indentation + extra + ",\n" + " "*last_indentation + ")" - sampling_params = to_replace.join(sampling_params.rsplit(")", 1)) - # Strip multiple commas - sampling_params = re.sub(r"[\,][\s]{0,}\,", ",", sampling_params) - - new_vllm_part = \ - f"\n{' '*8}if {args}.use_vllm:\n{sampling_params}"\ - f"\n{' '*8}else:\n" + splitted_sampling_params = sampling_params.split("\n") + if len(splitted_sampling_params) >= 2: + last_line = splitted_sampling_params[-1] + last_prev_line = splitted_sampling_params[-2] + last_prev_indentation = len(last_prev_line) - len(last_prev_line.lstrip()) + last_indentation = len(last_line) - len(last_line.lstrip()) + + # Add extra arguments to SamplingParams + extra = "**getattr(getattr(args, 'vllm_sampling_params', vLLMSamplingParams()), '_set_kwargs', {})" + # Backwards replace + to_replace = ",\n" + " "*last_prev_indentation + extra + ",\n" + " "*last_indentation + ")" + sampling_params = to_replace.join(sampling_params.rsplit(")", 1)) + # Strip multiple commas + sampling_params = re.sub(r"[\,][\s]{0,}\,", ",", sampling_params) + + new_vllm_part = \ + f"\n{' '*8}if {args}.use_vllm:\n{sampling_params}"\ + f"\n{' '*8}else:\n" pass - if trl_version >= "0.18": + if trl_version >= Version("0.18.0"): # Replace LLM init with already existing vLLM engine for colocate mode vllm_llm_init_pattern = r"self\.llm\s*=\s*LLM\(.*?\)*\)\s*?\n(?!,)" vllm_llm_replacement = "self.llm = model.vllm_engine\n" @@ -881,7 +884,6 @@ def patch_functions(RLTrainer, trainer_file, RLTrainer_name, all_imports, import ) init = init.replace(vllm_part, new_vllm_part) - pass # Search for vLLM calling in all child functions From bf0367eb45dc731104968052415184b8e2d080dc Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Wed, 10 Sep 2025 01:24:02 -0700 Subject: [PATCH 120/154] Update pyproject.toml --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index c2cb87ce3b..c860a92db6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,7 +37,7 @@ triton = [ ] huggingface = [ - "unsloth_zoo>=2025.9.3", + "unsloth_zoo>=2025.9.4", "packaging", "tyro", "transformers>=4.51.3,!=4.52.0,!=4.52.1,!=4.52.2,!=4.52.3,!=4.53.0,!=4.54.0,!=4.55.0,!=4.55.1", @@ -453,7 +453,7 @@ colab-ampere-torch220 = [ "flash-attn>=2.6.3", ] colab-new = [ - "unsloth_zoo>=2025.9.3", + "unsloth_zoo>=2025.9.4", "packaging", "tyro", "transformers>=4.51.3,!=4.47.0,!=4.52.0,!=4.52.1,!=4.52.2,!=4.52.3,!=4.53.0,!=4.54.0,!=4.55.0,!=4.55.1", From d2c2cc195a99b6b4dbeab7b6f65d1b302b7a9591 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Wed, 10 Sep 2025 01:26:58 -0700 Subject: [PATCH 121/154] Update loader.py --- unsloth/models/loader.py | 1 - 1 file changed, 1 deletion(-) diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py index de2f32f9af..a57deef000 100644 --- a/unsloth/models/loader.py +++ b/unsloth/models/loader.py @@ -809,7 +809,6 @@ def from_pretrained( # Patch gradient checkpointing if use_gradient_checkpointing == "unsloth": patch_unsloth_smart_gradient_checkpointing(dtype = dtype) - print(model_types) with redirector: patch_loss_functions(torch_compile = False) model_types, supports_sdpa = unsloth_compile_transformers( From 35ca1776b08f81f05e16e268f09cb444f1af1e1b Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Fri, 12 Sep 2025 18:53:46 -0700 Subject: [PATCH 122/154] Update loader.py --- unsloth/models/loader.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py index a57deef000..5ad283d39a 100644 --- a/unsloth/models/loader.py +++ b/unsloth/models/loader.py @@ -204,6 +204,7 @@ def from_pretrained( is_peft = False pass model_types = get_transformers_model_type(model_config or peft_config) + print("207", model_types_all) if len(model_types) == 1: model_type = model_types[0] else: @@ -619,6 +620,7 @@ def from_pretrained( pass model_types = get_transformers_model_type(model_config or peft_config) model_types_all = ",".join(model_types) + print("623", model_types_all) # Check versions lowered_model_name = model_name.lower() From 2eaf868efa817657405b4b67416b91be171b6285 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Fri, 12 Sep 2025 18:55:47 -0700 Subject: [PATCH 123/154] Update loader.py --- unsloth/models/loader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py index 5ad283d39a..fd41390889 100644 --- a/unsloth/models/loader.py +++ b/unsloth/models/loader.py @@ -204,7 +204,7 @@ def from_pretrained( is_peft = False pass model_types = get_transformers_model_type(model_config or peft_config) - print("207", model_types_all) + print("207", model_types) if len(model_types) == 1: model_type = model_types[0] else: From 7c892e798fa9ff71f25185ad5e4fb353f3b1a7e6 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Sat, 13 Sep 2025 02:21:02 -0700 Subject: [PATCH 124/154] Update loader.py --- unsloth/models/loader.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py index fd41390889..ab258f3ed9 100644 --- a/unsloth/models/loader.py +++ b/unsloth/models/loader.py @@ -204,7 +204,6 @@ def from_pretrained( is_peft = False pass model_types = get_transformers_model_type(model_config or peft_config) - print("207", model_types) if len(model_types) == 1: model_type = model_types[0] else: @@ -620,11 +619,11 @@ def from_pretrained( pass model_types = get_transformers_model_type(model_config or peft_config) model_types_all = ",".join(model_types) - print("623", model_types_all) # Check versions lowered_model_name = model_name.lower() - os.environ["UNSLOTH_MODEL_NAME"] = lowered_model_name + if os.environ.get("UNSLOTH_MODEL_NAME", "") == "": + os.environ["UNSLOTH_MODEL_NAME"] = lowered_model_name LATEST = '\nPlease use transformers via `pip install --no-deps git+https://github.com/huggingface/transformers.git`' NIGHTLY = '\nPlease use nightly transformers via pip install --upgrade "transformers>=4.49.0"`' # Pixtral From 72ff24c5ebff286427f46d47a46b82627533ed7f Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Sat, 13 Sep 2025 21:15:04 -0700 Subject: [PATCH 125/154] Versioning --- pyproject.toml | 4 ++-- unsloth/__init__.py | 2 +- unsloth/models/_utils.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index d77683c00a..8df936f807 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,7 +37,7 @@ triton = [ ] huggingface = [ - "unsloth_zoo>=2025.9.5", + "unsloth_zoo>=2025.9.6", "packaging", "tyro", "transformers>=4.51.3,!=4.52.0,!=4.52.1,!=4.52.2,!=4.52.3,!=4.53.0,!=4.54.0,!=4.55.0,!=4.55.1", @@ -453,7 +453,7 @@ colab-ampere-torch220 = [ "flash-attn>=2.6.3", ] colab-new = [ - "unsloth_zoo>=2025.9.5", + "unsloth_zoo>=2025.9.6", "packaging", "tyro", "transformers>=4.51.3,!=4.47.0,!=4.52.0,!=4.52.1,!=4.52.2,!=4.52.3,!=4.53.0,!=4.54.0,!=4.55.0,!=4.55.1", diff --git a/unsloth/__init__.py b/unsloth/__init__.py index 8255e505a8..1be571b69b 100644 --- a/unsloth/__init__.py +++ b/unsloth/__init__.py @@ -240,7 +240,7 @@ def is_bf16_supported(): return SUPPORTS_BFLOAT16 # Check for unsloth_zoo try: unsloth_zoo_version = importlib_version("unsloth_zoo") - if Version(unsloth_zoo_version) < Version("2025.9.5"): + if Version(unsloth_zoo_version) < Version("2025.9.6"): print( "Unsloth: Please update Unsloth and Unsloth-Zoo to the latest version!\n"\ "Do this via `pip install --upgrade --force-reinstall --no-cache-dir --no-deps unsloth unsloth_zoo`" diff --git a/unsloth/models/_utils.py b/unsloth/models/_utils.py index e3ac56ac83..4cf34aa007 100644 --- a/unsloth/models/_utils.py +++ b/unsloth/models/_utils.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2025.9.4" +__version__ = "2025.9.5" __all__ = [ "SUPPORTS_BFLOAT16", From 227842c5b87203c7c4ff1c2fc76763c79f33493c Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Mon, 15 Sep 2025 00:00:15 -0700 Subject: [PATCH 126/154] Update _utils.py --- unsloth/models/_utils.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/unsloth/models/_utils.py b/unsloth/models/_utils.py index 4cf34aa007..707d7220b2 100644 --- a/unsloth/models/_utils.py +++ b/unsloth/models/_utils.py @@ -614,6 +614,18 @@ def _is_openai_available(): return False # Get Xformers try: from xformers import __version__ as xformers_version + # [TODO] Xformers does NOT work on RTX 50x (12), B200 (10), Jetson (11) + # See https://github.com/facebookresearch/xformers/issues/1329 + # CUDA error (/workspace/xfrm2/third_party/flash-attention/hopper/flash_fwd_launch_template.h:188) + major_version, minor_version = torch.cuda.get_device_capability() + if ( + f"{major_version}.{minor_version}" in ("10.0", "11.0", "12.0")) and \ + (xformers_version in (Version("0.0.32.post2"),) + ): + raise NotImplementedError( + "Unsloth: Xformers does not work in RTX 50X, Blackwell GPUs as of yet." + ) + pass # Temporarily disable 0.0.27 and higher - inference issues if False: #Version(xformers_version) >= Version("0.0.27"): raise ImportError( @@ -661,7 +673,9 @@ def _is_openai_available(): return False pass import xformers.ops.fmha as xformers xformers_attention = xformers.memory_efficient_attention -except: +except Exception as e: + print("========\nSwitching to SDPA PyTorch native attention which is slightly slower.\n========\n") + print(str(e)) xformers = None xformers_attention = None xformers_version = None From 505ae67fe77b77c04faa7cfb3284fd25441b5ade Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Mon, 15 Sep 2025 00:03:50 -0700 Subject: [PATCH 127/154] Update _utils.py --- unsloth/models/_utils.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/unsloth/models/_utils.py b/unsloth/models/_utils.py index 707d7220b2..3878367650 100644 --- a/unsloth/models/_utils.py +++ b/unsloth/models/_utils.py @@ -84,7 +84,7 @@ from unsloth_zoo.utils import Version from importlib.metadata import version as importlib_version from unsloth import DEVICE_TYPE, DEVICE_COUNT - +from unsloth_zoo.log import logger from unsloth_zoo.tokenizer_utils import ( patch_tokenizer as _patch_tokenizer, ) @@ -608,8 +608,6 @@ def _is_openai_available(): return False elif DEVICE_TYPE == "xpu": SUPPORTS_BFLOAT16 = True -from transformers.models.llama.modeling_llama import logger - # ============================================= # Get Xformers try: From 80465dcabe0bd75dc8b43fddf3d8d672608fd087 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Mon, 15 Sep 2025 00:06:36 -0700 Subject: [PATCH 128/154] Update _utils.py --- unsloth/models/_utils.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/unsloth/models/_utils.py b/unsloth/models/_utils.py index 3878367650..2abc6b269b 100644 --- a/unsloth/models/_utils.py +++ b/unsloth/models/_utils.py @@ -617,13 +617,12 @@ def _is_openai_available(): return False # CUDA error (/workspace/xfrm2/third_party/flash-attention/hopper/flash_fwd_launch_template.h:188) major_version, minor_version = torch.cuda.get_device_capability() if ( - f"{major_version}.{minor_version}" in ("10.0", "11.0", "12.0")) and \ - (xformers_version in (Version("0.0.32.post2"),) + (f"{major_version}.{minor_version}" in ("10.0", "11.0", "12.0")) and \ + (Version(xformers_version) in (Version("0.0.32.post2"),)) ): raise NotImplementedError( "Unsloth: Xformers does not work in RTX 50X, Blackwell GPUs as of yet." ) - pass # Temporarily disable 0.0.27 and higher - inference issues if False: #Version(xformers_version) >= Version("0.0.27"): raise ImportError( From 4150e081ada733352975234f5a42f97a696a53c3 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Mon, 15 Sep 2025 01:21:43 -0700 Subject: [PATCH 129/154] Update _utils.py --- unsloth/models/_utils.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/unsloth/models/_utils.py b/unsloth/models/_utils.py index 2abc6b269b..a559d34ca4 100644 --- a/unsloth/models/_utils.py +++ b/unsloth/models/_utils.py @@ -621,7 +621,11 @@ def _is_openai_available(): return False (Version(xformers_version) in (Version("0.0.32.post2"),)) ): raise NotImplementedError( - "Unsloth: Xformers does not work in RTX 50X, Blackwell GPUs as of yet." + "Unsloth: Xformers does not work in RTX 50X, Blackwell GPUs as of yet. Please build from source via\n"\ + "```\n"\ + "pip install ninja\n"\ + "pip install -v --no-build-isolation -U git+https://github.com/facebookresearch/xformers.git@main#egg=xformers\n"\ + "```\n" ) # Temporarily disable 0.0.27 and higher - inference issues if False: #Version(xformers_version) >= Version("0.0.27"): From 032c2c840067870adbbba78ad3088ccd5e2ff849 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Mon, 15 Sep 2025 22:52:32 -0700 Subject: [PATCH 130/154] Update vision.py --- unsloth/models/vision.py | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/unsloth/models/vision.py b/unsloth/models/vision.py index 1451ed92cd..2c77169cb9 100644 --- a/unsloth/models/vision.py +++ b/unsloth/models/vision.py @@ -636,24 +636,17 @@ def get_peft_model( torch.xpu.empty_cache() pass max_seq_length = model.max_seq_length - # if we pass loftq_config = None we will get an error + # If we pass loftq_config = None we will get an error loftq_config = validate_loftq_config(loftq_config, lora_dropout, bias, init_lora_weights, model) - lora_config_dict = { - "r" : r, - "lora_alpha" : lora_alpha, - "target_modules" : target_modules, - "target_parameters" : kwargs.get("target_parameters", None), - "lora_dropout" : lora_dropout, - "bias" : bias, - "task_type" : task_type, - "modules_to_save" : modules_to_save, - "use_rslora" : use_rslora, - "init_lora_weights" : init_lora_weights, - "loftq_config" : loftq_config, - } + + # Get only allowed parameters for LoraConfig + local_variables = { **locals(), **kwargs, } + del local_variables["kwargs"] + allowed_parameters = inspect.signature(LoraConfig).parameters.keys() lora_config = LoraConfig( - **{k:v for k,v in lora_config_dict.items() if k in LoraConfig.__doc__}, + **{ k : v for k, v in local_variables.items() if k in allowed_parameters }, ) + print(lora_config) model = prepare_model_for_kbit_training( model, use_gradient_checkpointing = use_gradient_checkpointing, From b105aae096e46646bf9ea5b7e0f541cad981f066 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Mon, 15 Sep 2025 23:00:14 -0700 Subject: [PATCH 131/154] Update vision.py --- unsloth/models/vision.py | 1 - 1 file changed, 1 deletion(-) diff --git a/unsloth/models/vision.py b/unsloth/models/vision.py index 2c77169cb9..f8c0f866f9 100644 --- a/unsloth/models/vision.py +++ b/unsloth/models/vision.py @@ -646,7 +646,6 @@ def get_peft_model( lora_config = LoraConfig( **{ k : v for k, v in local_variables.items() if k in allowed_parameters }, ) - print(lora_config) model = prepare_model_for_kbit_training( model, use_gradient_checkpointing = use_gradient_checkpointing, From 400df38fb04aaec151c1d5b1e0d2a1ac23ceca6f Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Tue, 16 Sep 2025 03:00:39 -0700 Subject: [PATCH 132/154] Fix DataParallel --- unsloth/models/llama.py | 7 +++++-- unsloth/models/rl.py | 6 ++++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/unsloth/models/llama.py b/unsloth/models/llama.py index f7a53d05fd..e04ffd029e 100644 --- a/unsloth/models/llama.py +++ b/unsloth/models/llama.py @@ -1200,7 +1200,8 @@ def _CausalLM_fast_forward( if not RETURN_LOGITS and labels is not None: - n_items = kwargs.get("num_items_in_batch", None) or kwargs.get("n_items", None) + n_items = kwargs.get("num_items_in_batch", None) + if n_items is None: n_items = kwargs.get("n_items", None) if self.config.model_type == "falcon_h1": hidden_states = hidden_states * self.config.lm_head_multiplier @@ -1264,12 +1265,14 @@ def _CausalLM_fast_forward( shift_labels[..., :-1] = labels[..., 1:] shift_labels[..., -1] = -100 # shift_labels = torch.hstack((labels[..., 1:], self.extra_ignored_labels[:labels.shape[0]])) + n_items = kwargs.get("num_items_in_batch", None) + if n_items is None: n_items = kwargs.get("n_items", None) loss = fast_cross_entropy_loss( logits = shift_logits, labels = shift_labels, logit_softcapping = logit_softcapping, logit_scaling = logit_scaling, - n_items = kwargs.get("num_items_in_batch", None) or kwargs.get("n_items", None), + n_items = n_items, ) else: if logit_scaling != 0: diff --git a/unsloth/models/rl.py b/unsloth/models/rl.py index 53f5eee66c..9e940c763b 100644 --- a/unsloth/models/rl.py +++ b/unsloth/models/rl.py @@ -110,6 +110,7 @@ def generate_with_clone(*args, **kwargs): from contextlib import nullcontext from torch.nn import functional as F from transformers import DataCollatorForSeq2Seq, DataCollatorForLanguageModeling as TransformersDataCollatorForLanguageModeling +from transformers.training_args import ParallelMode torch_compile_options = {{ "epilogue_fusion" : True, @@ -160,6 +161,11 @@ def __init__({RLTrainer_arguments}, ): if args is None: args = Unsloth{RLConfig_name}() {RLTrainer_extra_args} + # [TODO] Fix up DataParallel multiplying batch sizes + # [TODO] DDP works, but DP seems to not work? [TODO] + if getattr(args, "parallel_mode", None) == ParallelMode.NOT_DISTRIBUTED and args.n_gpu > 1: + if getattr(args, "_n_gpu", 1) != 1: + args._n_gpu = 1 super().__init__({RLTrainer_call_args}{RLTrainer_kwargs}) {RLTrainer_post} pass From 809a8b3b206db30c676852af07270db8c44b7319 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Tue, 16 Sep 2025 03:02:52 -0700 Subject: [PATCH 133/154] Update _utils.py --- unsloth/models/_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsloth/models/_utils.py b/unsloth/models/_utils.py index a559d34ca4..194d18771c 100644 --- a/unsloth/models/_utils.py +++ b/unsloth/models/_utils.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2025.9.5" +__version__ = "2025.9.6" __all__ = [ "SUPPORTS_BFLOAT16", From 3dcc0911eb5e5ae360456e281f3e9ca99c5f95b8 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Wed, 17 Sep 2025 02:13:07 -0700 Subject: [PATCH 134/154] Update rl.py --- unsloth/models/rl.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/unsloth/models/rl.py b/unsloth/models/rl.py index 3e2fcf22be..6f1f000e68 100644 --- a/unsloth/models/rl.py +++ b/unsloth/models/rl.py @@ -271,14 +271,17 @@ def _patch_trl_rl_trainers(trainer_file = "grpo_trainer"): "if not force_float32 and (float16 and use_bf16): raise TypeError('Unsloth: Model is in float16 precision but you want to use bfloat16 precision. Set fp16 to `True` and bf16 to `False`')\n"\ "if not force_float32 and (not float16 and use_fp16): raise TypeError('Unsloth: Model is in bfloat16 precision but you want to use float16 precision. Set fp16 to `False` and bf16 to `True`')\n"\ "if force_float32:\n"\ + " # Forced float32 training\n"\ " args.fp16 = False\n"\ " args.bf16 = False\n"\ " os.environ['ACCELERATE_MIXED_PRECISION'] = 'no'\n"\ "elif (not use_bf16 and not use_fp16) and mixed_precision_dtype == 'float32':\n"\ + " # Mixed precision training\n"\ " args.fp16 = float16\n"\ " args.bf16 = not float16\n"\ " os.environ['ACCELERATE_MIXED_PRECISION'] = 'fp16' if float16 else 'bf16'\n" "elif mixed_precision_dtype == 'bfloat16':\n"\ + " # Both False since bfloat16 full finetuning doesn't do any autocasting.\n"\ " args.fp16 = False\n"\ " args.bf16 = False\n"\ " os.environ['ACCELERATE_MIXED_PRECISION'] = 'no'\n" From 28b1d50016921db9ada7bcdcdb67c61b92c9f379 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Wed, 17 Sep 2025 02:40:22 -0700 Subject: [PATCH 135/154] Update synthetic.py --- unsloth/dataprep/synthetic.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/unsloth/dataprep/synthetic.py b/unsloth/dataprep/synthetic.py index 52c114fab6..60742b7fdc 100644 --- a/unsloth/dataprep/synthetic.py +++ b/unsloth/dataprep/synthetic.py @@ -28,6 +28,7 @@ patch_vllm, delete_vllm, ) +from unsloth_zoo.log import logger import numpy as np from .synthetic_configs import ( @@ -117,6 +118,7 @@ def __init__( else: subprocess_commands += ["--" + flag, which,] pass + logger.info(subprocess_commands) vllm_process = subprocess.Popen( subprocess_commands, stdout = subprocess.PIPE, From de162d3e2a724dd178d24961bd9b989a68b70f2d Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Wed, 17 Sep 2025 02:56:36 -0700 Subject: [PATCH 136/154] Update synthetic.py --- unsloth/dataprep/synthetic.py | 1 + 1 file changed, 1 insertion(+) diff --git a/unsloth/dataprep/synthetic.py b/unsloth/dataprep/synthetic.py index 60742b7fdc..2cca155d6d 100644 --- a/unsloth/dataprep/synthetic.py +++ b/unsloth/dataprep/synthetic.py @@ -77,6 +77,7 @@ def __init__( return_args = True, enable_lora = False, use_bitsandbytes = False, + compilation_config = 3, **kwargs, ) if "dtype" in engine_args: From a507a7d82bb1792986ffaa99c9f10b4de7e6bba3 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Wed, 17 Sep 2025 03:03:32 -0700 Subject: [PATCH 137/154] Update synthetic.py --- unsloth/dataprep/synthetic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsloth/dataprep/synthetic.py b/unsloth/dataprep/synthetic.py index 2cca155d6d..d52f1df373 100644 --- a/unsloth/dataprep/synthetic.py +++ b/unsloth/dataprep/synthetic.py @@ -99,7 +99,7 @@ def __init__( if "model" in engine_args: del engine_args["model"] if "compilation_config" in engine_args: # Cannot parse in vllm serve - engine_args["compilation_config"] = 3 + engine_args["compilation_config"] = "'" + str(engine_args["compilation_config"]) + "'" subprocess_commands = [ "vllm", "serve", str(model_name), From cda72638c333e653d1ac74df30a69b6abfbf3624 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Wed, 17 Sep 2025 03:06:04 -0700 Subject: [PATCH 138/154] Update synthetic.py --- unsloth/dataprep/synthetic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsloth/dataprep/synthetic.py b/unsloth/dataprep/synthetic.py index d52f1df373..68dd475e59 100644 --- a/unsloth/dataprep/synthetic.py +++ b/unsloth/dataprep/synthetic.py @@ -99,7 +99,7 @@ def __init__( if "model" in engine_args: del engine_args["model"] if "compilation_config" in engine_args: # Cannot parse in vllm serve - engine_args["compilation_config"] = "'" + str(engine_args["compilation_config"]) + "'" + engine_args["compilation_config"] = '"' + str(engine_args["compilation_config"]) + '"' subprocess_commands = [ "vllm", "serve", str(model_name), From dd8ad929e13235091c0379a03a2f09ac3a5c61a1 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Wed, 17 Sep 2025 03:07:03 -0700 Subject: [PATCH 139/154] Update synthetic.py --- unsloth/dataprep/synthetic.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/unsloth/dataprep/synthetic.py b/unsloth/dataprep/synthetic.py index 68dd475e59..53d655ce0e 100644 --- a/unsloth/dataprep/synthetic.py +++ b/unsloth/dataprep/synthetic.py @@ -97,15 +97,15 @@ def __init__( engine_args["dtype"] = "auto" if "device" in engine_args: del engine_args["device"] if "model" in engine_args: del engine_args["model"] - if "compilation_config" in engine_args: - # Cannot parse in vllm serve - engine_args["compilation_config"] = '"' + str(engine_args["compilation_config"]) + '"' subprocess_commands = [ "vllm", "serve", str(model_name), ] for key, value in engine_args.items(): flag = key.replace("_", "-") + if key == "compilation_config": + subprocess_commands += ["--" + '"' + str(value) + '"',] + continue which = str(value).replace("torch.", "") if which == "True": # Ignore --enforce-eager True From a725b98363e50b7c80649e83975c1f9017f01eed Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Wed, 17 Sep 2025 03:24:07 -0700 Subject: [PATCH 140/154] Update synthetic.py --- unsloth/dataprep/synthetic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsloth/dataprep/synthetic.py b/unsloth/dataprep/synthetic.py index 53d655ce0e..7c421b33bf 100644 --- a/unsloth/dataprep/synthetic.py +++ b/unsloth/dataprep/synthetic.py @@ -104,7 +104,7 @@ def __init__( for key, value in engine_args.items(): flag = key.replace("_", "-") if key == "compilation_config": - subprocess_commands += ["--" + '"' + str(value) + '"',] + subprocess_commands += ["--" + flag, '"' + str(value) + '"',] continue which = str(value).replace("torch.", "") if which == "True": From 321f1a33b0e243691b8e297ac0170393d51456ff Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Wed, 17 Sep 2025 03:26:57 -0700 Subject: [PATCH 141/154] Update synthetic.py --- unsloth/dataprep/synthetic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsloth/dataprep/synthetic.py b/unsloth/dataprep/synthetic.py index 7c421b33bf..7e27b8261d 100644 --- a/unsloth/dataprep/synthetic.py +++ b/unsloth/dataprep/synthetic.py @@ -104,7 +104,7 @@ def __init__( for key, value in engine_args.items(): flag = key.replace("_", "-") if key == "compilation_config": - subprocess_commands += ["--" + flag, '"' + str(value) + '"',] + subprocess_commands += ["--" + flag, "'" + str(value) + "'",] continue which = str(value).replace("torch.", "") if which == "True": From 357e5019b7341c9b19f62db146950113e4aa58b9 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Wed, 17 Sep 2025 03:30:11 -0700 Subject: [PATCH 142/154] Update synthetic.py --- unsloth/dataprep/synthetic.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/unsloth/dataprep/synthetic.py b/unsloth/dataprep/synthetic.py index 7e27b8261d..aa5296c58b 100644 --- a/unsloth/dataprep/synthetic.py +++ b/unsloth/dataprep/synthetic.py @@ -104,7 +104,8 @@ def __init__( for key, value in engine_args.items(): flag = key.replace("_", "-") if key == "compilation_config": - subprocess_commands += ["--" + flag, "'" + str(value) + "'",] + quoted_compilation_config = '"' + str(value) + '"' + subprocess_commands += ["--" + flag, "'" + quoted_compilation_config[1:-1] + "'",] continue which = str(value).replace("torch.", "") if which == "True": From 8a03656b958d023c4e2639ef3cf7d6c0616f4efb Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Wed, 17 Sep 2025 03:32:54 -0700 Subject: [PATCH 143/154] Update synthetic.py --- unsloth/dataprep/synthetic.py | 1 + 1 file changed, 1 insertion(+) diff --git a/unsloth/dataprep/synthetic.py b/unsloth/dataprep/synthetic.py index aa5296c58b..eb73a5fb84 100644 --- a/unsloth/dataprep/synthetic.py +++ b/unsloth/dataprep/synthetic.py @@ -121,6 +121,7 @@ def __init__( subprocess_commands += ["--" + flag, which,] pass logger.info(subprocess_commands) + print(subprocess_commands) vllm_process = subprocess.Popen( subprocess_commands, stdout = subprocess.PIPE, From d7832d01baaef9a791c509d69c122c61385425f2 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Wed, 17 Sep 2025 03:42:00 -0700 Subject: [PATCH 144/154] Update synthetic.py --- unsloth/dataprep/synthetic.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/unsloth/dataprep/synthetic.py b/unsloth/dataprep/synthetic.py index eb73a5fb84..70f94e5584 100644 --- a/unsloth/dataprep/synthetic.py +++ b/unsloth/dataprep/synthetic.py @@ -102,10 +102,9 @@ def __init__( "vllm", "serve", str(model_name), ] for key, value in engine_args.items(): - flag = key.replace("_", "-") + flag = key.replace("_", "-") if key == "compilation_config": - quoted_compilation_config = '"' + str(value) + '"' - subprocess_commands += ["--" + flag, "'" + quoted_compilation_config[1:-1] + "'",] + subprocess_commands += ["--" + flag, str(value),] continue which = str(value).replace("torch.", "") if which == "True": @@ -121,7 +120,6 @@ def __init__( subprocess_commands += ["--" + flag, which,] pass logger.info(subprocess_commands) - print(subprocess_commands) vllm_process = subprocess.Popen( subprocess_commands, stdout = subprocess.PIPE, From 84f54348de880229dd67afbb737ea247839a6afa Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Wed, 17 Sep 2025 04:09:59 -0700 Subject: [PATCH 145/154] Update synthetic.py --- unsloth/dataprep/synthetic.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/unsloth/dataprep/synthetic.py b/unsloth/dataprep/synthetic.py index 70f94e5584..b75918237b 100644 --- a/unsloth/dataprep/synthetic.py +++ b/unsloth/dataprep/synthetic.py @@ -104,7 +104,8 @@ def __init__( for key, value in engine_args.items(): flag = key.replace("_", "-") if key == "compilation_config": - subprocess_commands += ["--" + flag, str(value),] + # [TODO] Unsure why subprocess doesn't process json properly + subprocess_commands += ["-O3",] continue which = str(value).replace("torch.", "") if which == "True": From 17b2e98f3df7735166a6c3f8b4ba2689418bc6e3 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Wed, 17 Sep 2025 04:16:41 -0700 Subject: [PATCH 146/154] Update synthetic.py --- unsloth/dataprep/synthetic.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/unsloth/dataprep/synthetic.py b/unsloth/dataprep/synthetic.py index b75918237b..9651df23e8 100644 --- a/unsloth/dataprep/synthetic.py +++ b/unsloth/dataprep/synthetic.py @@ -105,7 +105,8 @@ def __init__( flag = key.replace("_", "-") if key == "compilation_config": # [TODO] Unsure why subprocess doesn't process json properly - subprocess_commands += ["-O3",] + # Also -O3 breaks on T4! + # subprocess_commands += ["-O3",] continue which = str(value).replace("torch.", "") if which == "True": From 5364138046cdddedc37594ae87f5e51bb0265031 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Wed, 17 Sep 2025 06:35:44 -0700 Subject: [PATCH 147/154] Update mapper.py --- unsloth/models/mapper.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/unsloth/models/mapper.py b/unsloth/models/mapper.py index be269316fe..eb9119b681 100644 --- a/unsloth/models/mapper.py +++ b/unsloth/models/mapper.py @@ -956,6 +956,16 @@ "google/gemma-3-270m", "unsloth/gemma-3-270m-bnb-4bit", ), + "unsloth/Magistral-Small-2507-unsloth-bnb-4bit" : ( + "unsloth/Magistral-Small-2507", + "mistralai/Magistral-Small-2507", + "unsloth/Magistral-Small-2507-bnb-4bit", + ), + "unsloth/Magistral-Small-2509-unsloth-bnb-4bit" : ( + "unsloth/Magistral-Small-2509", + "mistralai/Magistral-Small-2509", + "unsloth/Magistral-Small-2509-bnb-4bit", + ), } INT_TO_FLOAT_MAPPER = {} From 8dbd0084d4097cf3c5eb03027ecdf5ec5bdacc17 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Wed, 17 Sep 2025 08:21:10 -0700 Subject: [PATCH 148/154] Versioning --- pyproject.toml | 4 ++-- unsloth/models/_utils.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 70fc3bdedc..c3915c1cd6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,7 +37,7 @@ triton = [ ] huggingface = [ - "unsloth_zoo>=2025.9.7", + "unsloth_zoo>=2025.9.8", "packaging", "tyro", "transformers>=4.51.3,!=4.52.0,!=4.52.1,!=4.52.2,!=4.52.3,!=4.53.0,!=4.54.0,!=4.55.0,!=4.55.1,<=4.55.4", @@ -453,7 +453,7 @@ colab-ampere-torch220 = [ "flash-attn>=2.6.3", ] colab-new = [ - "unsloth_zoo>=2025.9.7", + "unsloth_zoo>=2025.9.8", "packaging", "tyro", "transformers>=4.51.3,!=4.47.0,!=4.52.0,!=4.52.1,!=4.52.2,!=4.52.3,!=4.53.0,!=4.54.0,!=4.55.0,!=4.55.1,<=4.55.4", diff --git a/unsloth/models/_utils.py b/unsloth/models/_utils.py index 41adc74650..d2ebc29bf2 100644 --- a/unsloth/models/_utils.py +++ b/unsloth/models/_utils.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2025.9.6" +__version__ = "2025.9.7" __all__ = [ "SUPPORTS_BFLOAT16", From d7ca79f18ef5b794b3684768708ab7ebb57a4acc Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Wed, 17 Sep 2025 22:01:19 -0700 Subject: [PATCH 149/154] Update loader.py --- unsloth/models/loader.py | 39 ++++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py index da40fb57d8..e891340221 100644 --- a/unsloth/models/loader.py +++ b/unsloth/models/loader.py @@ -83,8 +83,8 @@ global FORCE_FLOAT32 FORCE_FLOAT32 = [ - "gemma3,", # Add comma bc gemma3 will match gemma3n - "gemma3n", + "gemma3,", + "gemma3n,", "gpt_oss", ] @@ -627,7 +627,7 @@ def from_pretrained( is_peft = False pass model_types = get_transformers_model_type(model_config or peft_config) - model_types_all = ",".join(model_types) + model_types_all = ",".join(model_types) + "," # Check versions lowered_model_name = model_name.lower() @@ -642,21 +642,22 @@ def from_pretrained( elif "qwen2_5" in model_types_all and transformers_version < Version("4.49.0"): raise RuntimeError("Unsloth: Qwen 2.5 only works on transformers >= 4.49.0." + LATEST) # Gemma 3 - elif "gemma3" in model_types_all: - if "gemma3n" in model_types_all: - if transformers_version < Version("4.53.0"): - raise RuntimeError("Unsloth: Gemma 3N only works on transformers >= 4.53.0" + LATEST) - os.environ["UNSLOTH_DISABLE_STATIC_GENERATION"] = "1" - os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \ - "float16;torch.float16;torch.float16;"\ - "if name.endswith('norm'): "\ - "module._pre_set_compute_dtype = torch.float32\n"\ - ";"\ - "from unsloth_zoo.temporary_patches.gemma3n import patch_Gemma3nConv_Embed_forwards; patch_Gemma3nConv_Embed_forwards()" - else: - if transformers_version < Version("4.50.0.dev0"): - raise RuntimeError("Unsloth: Gemma 3 only works on transformers >= 4.50.0." + NIGHTLY) - + elif "gemma3," in model_types_all: + if transformers_version < Version("4.50.0.dev0"): + raise RuntimeError("Unsloth: Gemma 3 only works on transformers >= 4.50.0." + NIGHTLY) + # Set norms to float32 since anyways they get upcasted to float32 + # common in both gemma-3 and gemma-3n + os.environ["UNSLOTH_HIGH_PRECISION_LAYERNORM"] = "1" + elif "gemma3n," in model_types_all: + if transformers_version < Version("4.53.0"): + raise RuntimeError("Unsloth: Gemma 3N only works on transformers >= 4.53.0" + LATEST) + os.environ["UNSLOTH_DISABLE_STATIC_GENERATION"] = "1" + os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \ + "float16;torch.float16;torch.float16;"\ + "if name.endswith('norm'): "\ + "module._pre_set_compute_dtype = torch.float32\n"\ + ";"\ + "from unsloth_zoo.temporary_patches.gemma3n import patch_Gemma3nConv_Embed_forwards; patch_Gemma3nConv_Embed_forwards()" # Set norms to float32 since anyways they get upcasted to float32 # common in both gemma-3 and gemma-3n os.environ["UNSLOTH_HIGH_PRECISION_LAYERNORM"] = "1" @@ -811,7 +812,7 @@ def from_pretrained( for disable_name in FORCE_FLOAT32: # add comma to model_types_all matching in case of exact match for end if (disable_name.lower() == model_type_arch.lower().replace("-", "").replace("_", "") or \ - disable_name.lower() in f'{model_types_all},') and \ + disable_name.lower() in model_types_all) and \ ((dtype == torch.float16) or not SUPPORTS_BFLOAT16): os.environ["UNSLOTH_FORCE_FLOAT32"] = "1" dtype = torch.bfloat16 # Change to bfloat16 loading From bb90785ad3066b4ba926cf1e607f120128c32982 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Wed, 17 Sep 2025 22:10:29 -0700 Subject: [PATCH 150/154] Update loader.py --- unsloth/models/loader.py | 39 +++++++++++++++++++-------------------- 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py index e891340221..da40fb57d8 100644 --- a/unsloth/models/loader.py +++ b/unsloth/models/loader.py @@ -83,8 +83,8 @@ global FORCE_FLOAT32 FORCE_FLOAT32 = [ - "gemma3,", - "gemma3n,", + "gemma3,", # Add comma bc gemma3 will match gemma3n + "gemma3n", "gpt_oss", ] @@ -627,7 +627,7 @@ def from_pretrained( is_peft = False pass model_types = get_transformers_model_type(model_config or peft_config) - model_types_all = ",".join(model_types) + "," + model_types_all = ",".join(model_types) # Check versions lowered_model_name = model_name.lower() @@ -642,22 +642,21 @@ def from_pretrained( elif "qwen2_5" in model_types_all and transformers_version < Version("4.49.0"): raise RuntimeError("Unsloth: Qwen 2.5 only works on transformers >= 4.49.0." + LATEST) # Gemma 3 - elif "gemma3," in model_types_all: - if transformers_version < Version("4.50.0.dev0"): - raise RuntimeError("Unsloth: Gemma 3 only works on transformers >= 4.50.0." + NIGHTLY) - # Set norms to float32 since anyways they get upcasted to float32 - # common in both gemma-3 and gemma-3n - os.environ["UNSLOTH_HIGH_PRECISION_LAYERNORM"] = "1" - elif "gemma3n," in model_types_all: - if transformers_version < Version("4.53.0"): - raise RuntimeError("Unsloth: Gemma 3N only works on transformers >= 4.53.0" + LATEST) - os.environ["UNSLOTH_DISABLE_STATIC_GENERATION"] = "1" - os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \ - "float16;torch.float16;torch.float16;"\ - "if name.endswith('norm'): "\ - "module._pre_set_compute_dtype = torch.float32\n"\ - ";"\ - "from unsloth_zoo.temporary_patches.gemma3n import patch_Gemma3nConv_Embed_forwards; patch_Gemma3nConv_Embed_forwards()" + elif "gemma3" in model_types_all: + if "gemma3n" in model_types_all: + if transformers_version < Version("4.53.0"): + raise RuntimeError("Unsloth: Gemma 3N only works on transformers >= 4.53.0" + LATEST) + os.environ["UNSLOTH_DISABLE_STATIC_GENERATION"] = "1" + os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \ + "float16;torch.float16;torch.float16;"\ + "if name.endswith('norm'): "\ + "module._pre_set_compute_dtype = torch.float32\n"\ + ";"\ + "from unsloth_zoo.temporary_patches.gemma3n import patch_Gemma3nConv_Embed_forwards; patch_Gemma3nConv_Embed_forwards()" + else: + if transformers_version < Version("4.50.0.dev0"): + raise RuntimeError("Unsloth: Gemma 3 only works on transformers >= 4.50.0." + NIGHTLY) + # Set norms to float32 since anyways they get upcasted to float32 # common in both gemma-3 and gemma-3n os.environ["UNSLOTH_HIGH_PRECISION_LAYERNORM"] = "1" @@ -812,7 +811,7 @@ def from_pretrained( for disable_name in FORCE_FLOAT32: # add comma to model_types_all matching in case of exact match for end if (disable_name.lower() == model_type_arch.lower().replace("-", "").replace("_", "") or \ - disable_name.lower() in model_types_all) and \ + disable_name.lower() in f'{model_types_all},') and \ ((dtype == torch.float16) or not SUPPORTS_BFLOAT16): os.environ["UNSLOTH_FORCE_FLOAT32"] = "1" dtype = torch.bfloat16 # Change to bfloat16 loading From 3289826add711c92dee44f1117fa6a54d6e68b91 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Thu, 18 Sep 2025 02:22:14 -0700 Subject: [PATCH 151/154] Update rl.py --- unsloth/models/rl.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/unsloth/models/rl.py b/unsloth/models/rl.py index 6f1f000e68..3d5f6d084b 100644 --- a/unsloth/models/rl.py +++ b/unsloth/models/rl.py @@ -259,7 +259,8 @@ def _patch_trl_rl_trainers(trainer_file = "grpo_trainer"): "use_fp16 = getattr(args, 'fp16', False)\n"\ "if type(use_fp16) is not bool: use_fp16 = False\n"\ "force_float32 = False\n"\ - "if os.environ.get('UNSLOTH_FORCE_FLOAT32', '0') == '1':\n"\ + "full_finetuning = os.environ.get('UNSLOTH_ENABLE_FULL_FINETUNING', '0') == '1'\n"\ + "if not full_finetuning and (os.environ.get('UNSLOTH_FORCE_FLOAT32', '0') == '1'):\n"\ " print('Unsloth: Switching to float32 training since model cannot work with float16')\n"\ " force_float32 = True\n"\ "mixed_precision_dtype = os.environ.get('UNSLOTH_MIXED_PRECISION', 'float32')\n"\ From a04211436f8a11aaece59d4662a29ab4c825a0b1 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Thu, 18 Sep 2025 04:31:28 -0700 Subject: [PATCH 152/154] Versioning --- pyproject.toml | 4 ++-- unsloth/models/_utils.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index c3915c1cd6..4f9c308b32 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,7 +37,7 @@ triton = [ ] huggingface = [ - "unsloth_zoo>=2025.9.8", + "unsloth_zoo>=2025.9.9", "packaging", "tyro", "transformers>=4.51.3,!=4.52.0,!=4.52.1,!=4.52.2,!=4.52.3,!=4.53.0,!=4.54.0,!=4.55.0,!=4.55.1,<=4.55.4", @@ -453,7 +453,7 @@ colab-ampere-torch220 = [ "flash-attn>=2.6.3", ] colab-new = [ - "unsloth_zoo>=2025.9.8", + "unsloth_zoo>=2025.9.9", "packaging", "tyro", "transformers>=4.51.3,!=4.47.0,!=4.52.0,!=4.52.1,!=4.52.2,!=4.52.3,!=4.53.0,!=4.54.0,!=4.55.0,!=4.55.1,<=4.55.4", diff --git a/unsloth/models/_utils.py b/unsloth/models/_utils.py index 79134005dc..5f41352d97 100644 --- a/unsloth/models/_utils.py +++ b/unsloth/models/_utils.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2025.9.7" +__version__ = "2025.9.8" __all__ = [ "SUPPORTS_BFLOAT16", From ffa04dde12b7fa9430566cce8b0309531f7af2ba Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Thu, 18 Sep 2025 15:45:42 -0700 Subject: [PATCH 153/154] Update _utils.py --- unsloth/models/_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsloth/models/_utils.py b/unsloth/models/_utils.py index 5f41352d97..79134005dc 100644 --- a/unsloth/models/_utils.py +++ b/unsloth/models/_utils.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2025.9.8" +__version__ = "2025.9.7" __all__ = [ "SUPPORTS_BFLOAT16", From b3654449bdd237e642e5f44c6e96c74e203232f7 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Thu, 18 Sep 2025 18:57:41 -0700 Subject: [PATCH 154/154] Fix auto_mapping --- unsloth/models/llama.py | 4 +++- unsloth/models/vision.py | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/unsloth/models/llama.py b/unsloth/models/llama.py index 7414c07326..6326f519f1 100644 --- a/unsloth/models/llama.py +++ b/unsloth/models/llama.py @@ -25,7 +25,7 @@ from torch.nn.functional import scaled_dot_product_attention from transformers import __version__ as transformers_version from unsloth_zoo.utils import Version, _get_dtype -from unsloth_zoo.hf_utils import dtype_from_config, add_dtype_kwargs +from unsloth_zoo.hf_utils import dtype_from_config, add_dtype_kwargs, fix_lora_auto_mapping from unsloth_zoo.peft_utils import SKIP_QUANTIZATION_MODULES from unsloth import DEVICE_TYPE, DEVICE_COUNT @@ -2632,6 +2632,8 @@ def get_peft_model( pass model = _get_peft_model(model, lora_config) + # Fix LoraConfig.auto_mapping is None + fix_lora_auto_mapping(model) # Apply QAT + LoRA if specified if qat_scheme is not None: diff --git a/unsloth/models/vision.py b/unsloth/models/vision.py index d6c710c281..d03ffb45a9 100644 --- a/unsloth/models/vision.py +++ b/unsloth/models/vision.py @@ -43,7 +43,7 @@ from transformers import __version__ as transformers_version from triton import __version__ as triton_version from unsloth_zoo.utils import _get_dtype -from unsloth_zoo.hf_utils import dtype_from_config, add_dtype_kwargs +from unsloth_zoo.hf_utils import dtype_from_config, add_dtype_kwargs, fix_lora_auto_mapping from unsloth_zoo.patching_utils import patch_model_and_tokenizer from unsloth_zoo.training_utils import prepare_model_for_training @@ -758,6 +758,8 @@ def get_peft_model( use_gradient_checkpointing = use_gradient_checkpointing, ) model = _get_peft_model(model, lora_config) + # Fix LoraConfig.auto_mapping is None + fix_lora_auto_mapping(model) # Enable gradients on modules which are trainable requires_grad_for_gradient_checkpointing(model) trust_remote_code = getattr(model, "_unsloth_trust_remote_code", False)