Skip to content
Merged

Nightly #3148

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ name = "unsloth"
dynamic = ["version"]
description = "2-5X faster LLM finetuning"
readme = "README.md"
requires-python = ">=3.9,<3.13"
requires-python = ">=3.9,<=3.13"
license = {text = "Apache-2.0"}
keywords = ["ai", "llm",]
authors = [
Expand Down Expand Up @@ -37,7 +37,7 @@ triton = [
]

huggingface = [
"unsloth_zoo>=2025.8.3",
"unsloth_zoo>=2025.8.4",
"packaging",
"tyro",
"transformers>=4.51.3,!=4.47.0,!=4.52.0,!=4.52.1,!=4.52.2,!=4.52.3,!=4.53.0",
Expand Down Expand Up @@ -384,7 +384,7 @@ colab-ampere-torch220 = [
"flash-attn>=2.6.3",
]
colab-new = [
"unsloth_zoo>=2025.8.3",
"unsloth_zoo>=2025.8.4",
"packaging",
"tyro",
"transformers>=4.51.3,!=4.47.0,!=4.52.0,!=4.52.1,!=4.52.2,!=4.52.3,!=4.53.0",
Expand Down
50 changes: 49 additions & 1 deletion unsloth/models/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

__version__ = "2025.8.4"
__version__ = "2025.8.5"

__all__ = [
"SUPPORTS_BFLOAT16",
Expand Down Expand Up @@ -58,6 +58,7 @@
"HAS_CUT_CROSS_ENTROPY",
"EMPTY_LOGITS",
"fused_linear_cross_entropy",
"unsloth_fused_ce_loss",
"patch_unsloth_smart_gradient_checkpointing",
"unpatch_unsloth_smart_gradient_checkpointing",

Expand Down Expand Up @@ -109,6 +110,7 @@
HAS_CUT_CROSS_ENTROPY,
fused_linear_cross_entropy,
_unsloth_get_batch_samples,
unsloth_fused_ce_loss,
)
from unsloth_zoo.vision_utils import (
process_vision_info,
Expand Down Expand Up @@ -152,6 +154,41 @@ def __init__(self, text): self.text = text
def filter(self, x): return not (self.text in x.getMessage())
pass

# Stop vLLM messages
if os.environ.get('UNSLOTH_ENABLE_LOGGING', '0') != '1':
try:
from vllm.worker.worker import logger as vllm_worker_logger
vllm_worker_logger.addFilter(HideLoggingMessage("Sleep mode freed"))
del vllm_worker_logger
except:
pass
try:
from vllm.v1.worker.gpu_worker import logger as vllm_gpu_worker_logger
vllm_gpu_worker_logger.addFilter(HideLoggingMessage("Sleep mode freed"))
del vllm_gpu_worker_logger
except:
pass
try:
from vllm.executor.executor_base import logger as vllm_executor_logger
vllm_executor_logger.addFilter(HideLoggingMessage("to fall asleep"))
vllm_executor_logger.addFilter(HideLoggingMessage("to wake up"))
del vllm_executor_logger
except:
pass
try:
from vllm.core.block.prefix_caching_block import logger as vllm_prefix_caching_logger
vllm_prefix_caching_logger.addFilter(HideLoggingMessage("reset prefix cache"))
del vllm_prefix_caching_logger
except:
pass
try:
from vllm.v1.core.block_pool import logger as vllm_block_pool_logger
vllm_block_pool_logger.addFilter(HideLoggingMessage("reset prefix cache"))
del vllm_block_pool_logger
except:
pass
pass

# The speedups for torchdynamo mostly come with GPU Ampere or higher and which is not detected here.
from transformers.training_args import logger as transformers_training_args_logger
transformers_training_args_logger.addFilter(HideLoggingMessage("The speedups"))
Expand Down Expand Up @@ -224,6 +261,17 @@ def filter(self, x): return not (self.text in x.getMessage())
except:
pass

# You passed `quantization_config` or equivalent parameters
try:
warnings.filterwarnings(
action = "ignore",
message = r".*quantization_config.*",
category = UserWarning,
append = True,
)
except:
pass

# Errors out on
# Some weights of Gemma3nForConditionalGeneration were not initialized from the model checkpoint
from transformers.modeling_utils import logger as transformers_logger
Expand Down
25 changes: 19 additions & 6 deletions unsloth/models/llama.py
Original file line number Diff line number Diff line change
Expand Up @@ -1197,12 +1197,25 @@ def _CausalLM_fast_forward(
if self.config.model_type == "falcon_h1":
hidden_states = hidden_states * self.config.lm_head_multiplier

loss = fused_linear_cross_entropy(
hidden_states = hidden_states,
lm_weight = lm_head,
labels = labels,
num_items_in_batch = n_items,
logit_softcapping = logit_softcapping,
# loss = fused_linear_cross_entropy(
# hidden_states = hidden_states,
# lm_weight = lm_head,
# labels = labels,
# num_items_in_batch = n_items,
# logit_softcapping = logit_softcapping,
# )
loss = unsloth_fused_ce_loss(
trainer = None,
hidden_states = hidden_states,
lm_head_weight = lm_head,
lm_head_bias = None,
labels = labels,
mask = None,
n_items = n_items,
scaling = getattr(self, "accelerator_scaler", None),
target_gb = 1,
torch_compile = True,
logit_softcapping = logit_softcapping,
)
if not return_dict:
output = (logits,) + outputs[1:]
Expand Down
21 changes: 19 additions & 2 deletions unsloth/models/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,14 @@ def from_pretrained(
disable_log_stats = True,
*args, **kwargs,
):
# Login to allow private models
if token is None: token = get_token()
if token is not None:
try:
from huggingface_hub import login
login(token = token)
except:
pass
if load_in_8bit or full_finetuning:
return FastModel.from_pretrained(
model_name = model_name,
Expand Down Expand Up @@ -513,6 +521,13 @@ def from_pretrained(
*args, **kwargs,
):
if token is None: token = get_token()
# Login to allow private models
if token is not None:
try:
from huggingface_hub import login
login(token = token)
except:
pass
if whisper_language is not None: assert(type(whisper_language) is str)
if whisper_task is not None: assert(type(whisper_task) is str)
SUPPORTS_BFLOAT16 = is_bfloat16_supported()
Expand Down Expand Up @@ -587,10 +602,12 @@ def from_pretrained(
if transformers_version < Version("4.53.0"):
raise RuntimeError("Unsloth: Gemma 3N only works on transformers >= 4.53.0" + LATEST)
elif "falcon-h1" in lowered_model_name:
# Falcon must use float32 Triton ie TRITON_F32_DEFAULT = 'ieee'
# since Mamba kernels error out on using lower precision
os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = \
"float16;torch.float32;torch.float16;"\
"if name.endswith(('q_proj', 'k_proj', 'v_proj', 'o_proj', 'gate_proj', 'up_proj', 'down_proj', 'head')): module.to(torch.float16); "\
"os.environ['TRITON_F32_DEFAULT'] = 'ieee';"
"if name.endswith(('q_proj', 'k_proj', 'v_proj', 'o_proj', 'gate_proj', 'up_proj', 'down_proj', 'head')): module.to(torch.float16);"\
"os.environ['TRITON_F32_DEFAULT'] = 'ieee'"
elif "gpt-oss" in lowered_model_name:
os.environ["UNSLOTH_DISABLE_STATIC_GENERATION"] = "1"
# CCE fails on Tesla T4
Expand Down
14 changes: 14 additions & 0 deletions unsloth/models/rl.py
Original file line number Diff line number Diff line change
Expand Up @@ -421,6 +421,20 @@ def _patch_trl_rl_trainers(trainer_file = "grpo_trainer"):
RLTrainer_post += neftune_check
pass

# Add accelerator scaler to model
if "model" in call_args:
neftune_check = \
"if hasattr(self, 'accelerator'):\n"\
" scaler = self.accelerator.scaler\n"\
" current_model = model\n"\
" while hasattr(current_model, 'model'):\n"\
" current_model.accelerator_scaler = scaler\n"\
" current_model = current_model.model\n"\
" current_model.accelerator_scaler = scaler\n"\
"pass\n"
RLTrainer_post += neftune_check
pass

# Edit optional metrics
other_metrics_processor = ""
if trainer_file in RL_METRICS_CHANGES:
Expand Down