Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 40 additions & 12 deletions unsloth/device_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
import torch
import functools
from unsloth_zoo.utils import Version
import inspect


@functools.cache
Expand Down Expand Up @@ -78,21 +77,50 @@ def get_device_count():

DEVICE_COUNT: int = get_device_count()

# Check blocksize for 4bit -> 64 for CUDA, 128 for AMD
# If AMD, we cannot load pre-quantized models for now :(
# 4-bit quantization requires a block size of 64
# this is not supported on AMD Instinct GPUs currently
# | Device Type | Warp Size | Block Size |
# |-----------------|-----------|------------|
# | CUDA | 32 | 64 |
# | Radeon (Navi) | 32 | 64 |
# | Instinct (MI) | 64 | 128 |
#
# Since bitsandbytes 0.49.0, pre-quantized models with 64 blockwise now works
# on Radeon GPUs, but not Instinct MI300x for eg [WIP]
# See https://github.com/bitsandbytes-foundation/bitsandbytes/pull/1748

ALLOW_PREQUANTIZED_MODELS: bool = True
# HSA_STATUS_ERROR_EXCEPTION checks - sometimes AMD fails for BnB
ALLOW_BITSANDBYTES: bool = True
if DEVICE_TYPE == "hip":
try:
from bitsandbytes.nn.modules import Params4bit

if "blocksize = 64 if not HIP_ENVIRONMENT else 128" in inspect.getsource(
Params4bit
):
ALLOW_PREQUANTIZED_MODELS = False
import bitsandbytes

ALLOW_BITSANDBYTES = Version(bitsandbytes.__version__) > Version("0.48.2.dev0")
except:
pass
print(
"Unsloth: `bitsandbytes` is not installed - 4bit QLoRA unallowed, but 16bit and full finetuning works."
)
ALLOW_PREQUANTIZED_MODELS = False
ALLOW_BITSANDBYTES = False
if ALLOW_BITSANDBYTES:
ALLOW_BITSANDBYTES = Version(bitsandbytes.__version__) > Version("0.48.2.dev0")
if Version(bitsandbytes.__version__) > Version("0.49.0"):
try:
# Pre-quantized bitsandbytes models use blocksize 64, so we need to check the GPU
from bitsandbytes.cextension import ROCM_WARP_SIZE_64

ALLOW_PREQUANTIZED_MODELS = not ROCM_WARP_SIZE_64
except Exception as e:
print(
"Unsloth: Checking `from bitsandbytes.cextension import ROCM_WARP_SIZE_64` had error = \n"
f"{str(e)}\n"
"4bit QLoRA disabled for now, but 16bit and full finetuning works."
)
ALLOW_PREQUANTIZED_MODELS = False
ALLOW_BITSANDBYTES = False
elif ALLOW_BITSANDBYTES:
from bitsandbytes.nn.modules import Params4bit

if "blocksize = 64 if not HIP_ENVIRONMENT else 128" in inspect.getsource(
Params4bit
):
ALLOW_PREQUANTIZED_MODELS = False
8 changes: 4 additions & 4 deletions unsloth/models/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,7 @@ def from_pretrained(
model_name = new_model_name

# Check if pre-quantized models are allowed
# For eg AMD GPUs need blocksize = 128, but our pre-quants are blocksize = 64
# For eg AMD Instinct GPUs need blocksize = 128, but our pre-quants are blocksize = 64
if not ALLOW_PREQUANTIZED_MODELS and model_name.lower().endswith(
("-unsloth-bnb-4bit", "-bnb-4bit")
):
Expand Down Expand Up @@ -383,7 +383,7 @@ def from_pretrained(
if not use_exact_model_name:
model_name = get_model_name(model_name, load_in_4bit)
# Check if pre-quantized models are allowed
# For eg AMD GPUs need blocksize = 128, but our pre-quants are blocksize = 64
# For eg AMD Instinct GPUs need blocksize = 128, but our pre-quants are blocksize = 64
if not ALLOW_PREQUANTIZED_MODELS and model_name.lower().endswith(
("-unsloth-bnb-4bit", "-bnb-4bit")
):
Expand Down Expand Up @@ -790,7 +790,7 @@ def from_pretrained(
model_name = new_model_name

# Check if pre-quantized models are allowed
# For eg AMD GPUs need blocksize = 128, but our pre-quants are blocksize = 64
# For eg AMD Instinct GPUs need blocksize = 128, but our pre-quants are blocksize = 64
if not ALLOW_PREQUANTIZED_MODELS and model_name.lower().endswith(
("-unsloth-bnb-4bit", "-bnb-4bit")
):
Expand Down Expand Up @@ -1056,7 +1056,7 @@ def from_pretrained(
if not use_exact_model_name:
model_name = get_model_name(model_name, load_in_4bit)
# Check if pre-quantized models are allowed
# For eg AMD GPUs need blocksize = 128, but our pre-quants are blocksize = 64
# For eg AMD Instinct GPUs need blocksize = 128, but our pre-quants are blocksize = 64
if not ALLOW_PREQUANTIZED_MODELS and model_name.lower().endswith(
("-unsloth-bnb-4bit", "-bnb-4bit")
):
Expand Down