diff --git a/unsloth/models/vision.py b/unsloth/models/vision.py index 30d381c791..31901b1570 100644 --- a/unsloth/models/vision.py +++ b/unsloth/models/vision.py @@ -654,6 +654,8 @@ def from_pretrained( raise_handler = RaiseUninitialized() if not fast_inference: + # Prevent load_in_fp8 from being forwarded into HF internal model loading + load_in_fp8 = kwargs.pop("load_in_fp8", None) model = auto_model.from_pretrained( model_name, device_map = device_map,