Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
48 commits
Select commit Hold shift + click to select a range
c39f56f
Fix mamba
danielhanchen Aug 9, 2025
4bd35c5
Update loader.py
danielhanchen Aug 9, 2025
1f0a4c3
Update vision.py
danielhanchen Aug 9, 2025
3cb9719
Update loader.py
danielhanchen Aug 9, 2025
a4081af
Merge branch 'main' into nightly
danielhanchen Aug 13, 2025
1432eac
Filter vLLM standby logs (#3131)
Datta0 Aug 13, 2025
fd1124a
Update loader.py
danielhanchen Aug 13, 2025
b78189b
Add scaler
danielhanchen Aug 13, 2025
cd2e284
Update llama.py
danielhanchen Aug 13, 2025
5e976a5
Update _utils.py
danielhanchen Aug 13, 2025
f451adf
Versioning
danielhanchen Aug 13, 2025
dafc7b8
Merge branch 'main' into nightly
danielhanchen Aug 13, 2025
bf5c402
Merge branch 'main' into nightly
danielhanchen Aug 13, 2025
3b82c42
GPT OSS fix
danielhanchen Aug 14, 2025
61366ef
GPT OSS fix
danielhanchen Aug 14, 2025
de043d9
Update loader.py
danielhanchen Aug 14, 2025
c1ef6f1
Update vision.py
danielhanchen Aug 14, 2025
f18cd26
Update vision.py
danielhanchen Aug 14, 2025
0215224
Update loader.py
danielhanchen Aug 14, 2025
5ed4a46
Update vision.py
danielhanchen Aug 15, 2025
e2ebb99
Merge branch 'main' into nightly
danielhanchen Aug 15, 2025
a222558
Update vision.py
danielhanchen Aug 15, 2025
cdcfe7d
Merge branch 'main' into nightly
danielhanchen Aug 15, 2025
6cffb1c
Update llama.py
danielhanchen Aug 15, 2025
15d33a5
Update llama.py
danielhanchen Aug 15, 2025
95a4daf
Update llama.py
danielhanchen Aug 15, 2025
4104bba
Versioning
danielhanchen Aug 15, 2025
8cc1999
Update mapper.py
danielhanchen Aug 15, 2025
a5dffd7
Merge branch 'main' into nightly
danielhanchen Aug 16, 2025
ffda8a7
Update vision.py
danielhanchen Aug 16, 2025
cdf2e17
Update vision.py
danielhanchen Aug 16, 2025
941d1ae
Update vision.py
danielhanchen Aug 16, 2025
73fa72c
Upcast norms
danielhanchen Aug 16, 2025
e4bbeef
Update loader.py
danielhanchen Aug 16, 2025
c8d00be
Update vision.py
danielhanchen Aug 16, 2025
564b6f8
Upcast layernorms
danielhanchen Aug 17, 2025
b8a34b4
Update llama.py
danielhanchen Aug 17, 2025
509fcb5
Update llama.py
danielhanchen Aug 17, 2025
27f1a2e
Update llama.py
danielhanchen Aug 18, 2025
931851a
Update llama.py
danielhanchen Aug 18, 2025
3b9057b
Update llama.py
danielhanchen Aug 18, 2025
3dd87bb
Update llama.py
danielhanchen Aug 18, 2025
f3f2b51
Merge branch 'main' into nightly
danielhanchen Aug 18, 2025
b757faf
Update save.py
danielhanchen Aug 18, 2025
2e86333
Update rl.py
danielhanchen Aug 18, 2025
b01e948
Update pyproject.toml
danielhanchen Aug 18, 2025
b064255
Merge branch 'main' into nightly
danielhanchen Aug 18, 2025
a751fd7
Update rl.py
danielhanchen Aug 18, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ triton = [
]

huggingface = [
"unsloth_zoo>=2025.8.5",
"unsloth_zoo>=2025.8.6",
"packaging",
"tyro",
"transformers>=4.51.3,!=4.47.0,!=4.52.0,!=4.52.1,!=4.52.2,!=4.52.3,!=4.53.0",
Expand Down Expand Up @@ -384,7 +384,7 @@ colab-ampere-torch220 = [
"flash-attn>=2.6.3",
]
colab-new = [
"unsloth_zoo>=2025.8.5",
"unsloth_zoo>=2025.8.6",
"packaging",
"tyro",
"transformers>=4.51.3,!=4.47.0,!=4.52.0,!=4.52.1,!=4.52.2,!=4.52.3,!=4.53.0",
Expand Down
37 changes: 26 additions & 11 deletions unsloth/models/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -571,8 +571,11 @@ def from_pretrained(
elif "qwen2.5" in lowered_model_name and transformers_version < Version("4.49.0"):
raise RuntimeError("Unsloth: Qwen 2.5 only works on transformers >= 4.49.0." + LATEST)
# Gemma 3
elif "gemma-3" in lowered_model_name and transformers_version < Version("4.50.0.dev0"):
raise RuntimeError("Unsloth: Gemma 3 only works on transformers >= 4.50.0." + NIGHTLY)
elif "gemma-3" in lowered_model_name:
if transformers_version < Version("4.50.0.dev0"):
raise RuntimeError("Unsloth: Gemma 3 only works on transformers >= 4.50.0." + NIGHTLY)
# Set norms to float32 since anyways they get upcasted to float32
os.environ["UNSLOTH_HIGH_PRECISION_LAYERNORM"] = "1"
# Cohere
elif "c4ai-command-a-03-2025" in lowered_model_name and transformers_version < Version("4.50.0.dev0"):
raise RuntimeError("Unsloth: Cohere's Command model only works on transformers >= 4.50.0." + NIGHTLY)
Expand All @@ -582,31 +585,36 @@ def from_pretrained(
os.environ["UNSLOTH_DISABLE_STATIC_GENERATION"] = "1" # Sesame fails
os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \
"all;torch.float32;torch.float16;"\
"if name.endswith(('_proj', 'fc1', 'fc2', 'codebook', 'head')): module.to(torch.float16);"
"if name.endswith(('_proj', 'fc1', 'fc2', 'codebook', 'head')): module.to(torch.float16)"\
";"
# Granite 4
elif 'granite-4' in lowered_model_name:
# granite-4 rms norms are stored as 16 bit, but we upcast
os.environ["UNSLOTH_UPCAST_LAYERNORM"] = "1"
# Granite-4 rms norms are stored as 16 bit, but we upcast
os.environ["UNSLOTH_HIGH_PRECISION_LAYERNORM"] = "1"
os.environ["UNSLOTH_DISABLE_STATIC_GENERATION"] = "1"
# Olmo 2
elif "olmo-2" in lowered_model_name and transformers_version < Version("4.50.0.dev0"):
raise RuntimeError("Unsloth: OLMo-2 only works on transformers >= 4.50.0." + NIGHTLY)
# Gemma 3N
elif "gemma-3n" in lowered_model_name:
if transformers_version < Version("4.53.0"):
raise RuntimeError("Unsloth: Gemma 3N only works on transformers >= 4.53.0" + LATEST)
os.environ["UNSLOTH_DISABLE_STATIC_GENERATION"] = "1"
os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \
"float16;torch.float16;torch.float16;"\
"if name.endswith(('.conv')): module;"\
"if name.endswith('norm'): "\
"module._pre_set_compute_dtype = torch.float32\n"\
";"\
"from unsloth_zoo.temporary_patches.gemma3n import patch_Gemma3nConvNormAct_forward; patch_Gemma3nConvNormAct_forward()"

if transformers_version < Version("4.53.0"):
raise RuntimeError("Unsloth: Gemma 3N only works on transformers >= 4.53.0" + LATEST)
# Set norms to float32 since anyways they get upcasted to float32
os.environ["UNSLOTH_HIGH_PRECISION_LAYERNORM"] = "1"
elif "falcon-h1" in lowered_model_name:
# Falcon must use float32 Triton ie TRITON_F32_DEFAULT = 'ieee'
# since Mamba kernels error out on using lower precision
os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \
"float16;torch.float32;torch.float16;"\
"if name.endswith(('q_proj', 'k_proj', 'v_proj', 'o_proj', 'gate_proj', 'up_proj', 'down_proj', 'head')): module.to(torch.float16);"\
"if name.endswith(('q_proj', 'k_proj', 'v_proj', 'o_proj', 'gate_proj', 'up_proj', 'down_proj', 'head')): module.to(torch.float16)"\
";"\
"os.environ['TRITON_F32_DEFAULT'] = 'ieee'"
elif "gpt-oss" in lowered_model_name:
os.environ["UNSLOTH_DISABLE_STATIC_GENERATION"] = "1"
Expand All @@ -615,23 +623,30 @@ def from_pretrained(
os.environ["UNSLOTH_ENABLE_CCE"] = "0"
if not load_in_4bit:
# Only upcast MoE biases for MXFP4, not BnB
# Set norms to float32 since anyways they get upcasted to float32
os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \
"all;None;None;"\
"x = 'gate_up_proj_bias'\n"\
"if hasattr(module, x): "\
"setattr(module, x, torch.nn.Parameter(getattr(module, x).to(torch.float32)) if isinstance(getattr(module, x), torch.nn.Parameter) else getattr(module, x).to(torch.float32))\n"\
""\
"x = 'down_proj_bias'\n"\
"if hasattr(module, x): "\
"setattr(module, x, torch.nn.Parameter(getattr(module, x).to(torch.float32)) if isinstance(getattr(module, x), torch.nn.Parameter) else getattr(module, x).to(torch.float32))\n"\
""\
";"
else:
# Set down projection compute dtype to be float32 for float16 machines
# Set norms to float32 since anyways they get upcasted to float32
os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \
"all;None;None;"\
"if 'down_projs' in name and hasattr(module, 'compute_dtype') and "\
"if 'down_projs' in name and hasattr(module, 'weight') and "\
"torch.amax(dequantize_module_weight(module)) >= 1024:"\
"module._pre_set_compute_dtype = torch.float32\n"\
""\
";"
# Set norms to float32 since anyways they get upcasted to float32
os.environ["UNSLOTH_HIGH_PRECISION_LAYERNORM"] = "1"
else:
for check_model_name in DISABLE_COMPILE_MODEL_NAMES:
if check_model_name in lowered_model_name:
Expand Down
2 changes: 2 additions & 0 deletions unsloth/models/rl.py
Original file line number Diff line number Diff line change
Expand Up @@ -487,6 +487,8 @@ def _patch_trl_rl_trainers(trainer_file = "grpo_trainer"):
"logging_steps" : 1,
"max_seq_length" : None,
"num_generations" : 8,
# "steps_per_generation" : 1, # Otherwise defaults to ga_steps which is wrong
# "generation_batch_size" : None, # Useless. If steps_per_generation set, generation_batch_size clashes
"top_k" : None,
"vllm_mode" : "colocate",
"generation_kwargs" : {},
Expand Down
6 changes: 6 additions & 0 deletions unsloth/models/vision.py
Original file line number Diff line number Diff line change
Expand Up @@ -455,6 +455,12 @@ def from_pretrained(
# Return old flag
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = old_hf_transfer

# Check float32 norm weights
if os.environ.get("UNSLOTH_HIGH_PRECISION_LAYERNORM", "0") == "1":
for jj, (name, module) in enumerate(model.named_modules()):
if name.endswith("norm") and hasattr(module, "weight"):
module._pre_set_compute_dtype = torch.float32
pass
# Edit data-types
if custom_datatype is not None:
with torch.no_grad():
Expand Down
41 changes: 37 additions & 4 deletions unsloth/save.py
Original file line number Diff line number Diff line change
Expand Up @@ -1195,6 +1195,41 @@ def save_to_gguf(
f"--outfile {final_location} --vocab-type {vocab_type} "\
f"--outtype {first_conversion} --concurrency {n_cpus} --pad-vocab"
else:
# Fix up conversion script is possible
with open(convert_location, "rb") as f: converter_latest = f.read()
# Fix metadata
converter_latest = re.sub(
rb"(self\.metadata \= .+?\(.+?\)"\
rb"[\n]{1,}([\s]{4,}))",
rb"\1"\
rb"if hasattr(self.metadata, 'quantized_by'): self.metadata.quantized_by = 'Unsloth'\n"\
rb"\2if hasattr(self.metadata, 'repo_url'): self.metadata.repo_url = 'https://huggingface.co/unsloth'\n"\
rb"\2if hasattr(self.metadata, 'tags'): self.metadata.tags = ['unsloth', 'llama.cpp']\n"\
rb"\2",
converter_latest,
)

# Make mistral_common optional for now
# from x import y
converter_latest = re.sub(
rb"(from mistral_common[^\n\(]{1,})[\s]{0,}\n",
rb"try:\n \1\nexcept:\n pass\n",
converter_latest,
)
# from x import (y, z,)
converter_latest = re.sub(
rb"(from mistral_common[^\n\(]{1,}[\s]{0,}\(.+?\))",
rb"try:\n \1\nexcept:\n pass\n",
converter_latest,
flags = re.MULTILINE | re.DOTALL,
)

try:
# Write file
with open(convert_location, "wb") as file:
file.write(converter_latest)
except:
pass
command = f"python {convert_location} {model_directory} "\
f"--outfile {final_location} "\
f"--outtype {first_conversion}"
Expand Down Expand Up @@ -1694,7 +1729,7 @@ def push_to_ollama_hub(username: str, model_name: str, tag: str):
print(f"\nMODEL PUBLISHED FAILED WITH RETURN CODE {return_code}")
else:
print("\nMODEL PUBLISHED SUCCESSFULLY")

pass

def push_to_ollama(
tokenizer,
Expand Down Expand Up @@ -1726,9 +1761,7 @@ def push_to_ollama(
)

print("Successfully pushed to ollama")



pass


def unsloth_save_pretrained_gguf(
Expand Down