Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 11 additions & 6 deletions unsloth/device_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,16 +79,19 @@ def get_device_count():
DEVICE_COUNT: int = get_device_count()

# 4-bit quantization requires a block size of 64
# this is not supported on AMD Instinct GPUs currently
# | Device Type | Warp Size | Block Size |
# |-----------------|-----------|------------|
# | CUDA | 32 | 64 |
# | Radeon (Navi) | 32 | 64 |
# | Instinct (MI) | 64 | 128 |
# | CUDA | 32 | 32 |
# | Radeon (Navi) | 32 | 32 |
# | Instinct (MI) | 64 | 32 |
#
# Since bitsandbytes 0.49.0, pre-quantized models with 64 blockwise now works
# on Radeon GPUs, but not Instinct MI300x for eg [WIP]
# on Radeon GPUs, but not Instinct MI300x for eg
# See https://github.com/bitsandbytes-foundation/bitsandbytes/pull/1748
#
# Since bitsandbytes 0.49.2, blocksize=64 4-bit quantization is supported on
# CDNA (MI Instinct / gfx9xx) GPUs as well
# See https://github.com/bitsandbytes-foundation/bitsandbytes/pull/1856

ALLOW_PREQUANTIZED_MODELS: bool = True
# HSA_STATUS_ERROR_EXCEPTION checks - sometimes AMD fails for BnB
Expand All @@ -104,7 +107,9 @@ def get_device_count():
ALLOW_BITSANDBYTES = False
if ALLOW_BITSANDBYTES:
ALLOW_BITSANDBYTES = Version(bitsandbytes.__version__) > Version("0.48.2.dev0")
if Version(bitsandbytes.__version__) > Version("0.49.0"):
if Version(bitsandbytes.__version__) >= Version("0.49.2"):
pass
Comment on lines +110 to +111
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Preserve HIP extension probe for bitsandbytes >=0.49.2

This branch now does pass for HIP when bitsandbytes>=0.49.2, which skips the guarded bitsandbytes.cextension import that previously caught broken ROCm installs (the same HSA failure mode noted in the comments). In that scenario, ALLOW_BITSANDBYTES remains True, so the loader still enables 4-bit paths and fails later at runtime instead of falling back safely. Please keep an explicit health check in this branch so invalid HIP/bitsandbytes setups are disabled early.

Useful? React with 👍 / 👎.

elif Version(bitsandbytes.__version__) >= Version("0.49.0"):
try:
# Pre-quantized bitsandbytes models use blocksize 64, so we need to check the GPU
from bitsandbytes.cextension import ROCM_WARP_SIZE_64
Expand Down
8 changes: 4 additions & 4 deletions unsloth/models/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -397,7 +397,7 @@ def from_pretrained(
load_in_fp8 = False

# Check if pre-quantized models are allowed
# For eg AMD Instinct GPUs need blocksize = 128, but our pre-quants are blocksize = 64
# AMD Instinct GPUs need blocksize = 128 on bitsandbytes < 0.49.2 (our pre-quants use blocksize = 64)
if not ALLOW_PREQUANTIZED_MODELS and model_name.lower().endswith(
("-unsloth-bnb-4bit", "-bnb-4bit")
):
Expand Down Expand Up @@ -537,7 +537,7 @@ def from_pretrained(
trust_remote_code = trust_remote_code,
)
# Check if pre-quantized models are allowed
# For eg AMD Instinct GPUs need blocksize = 128, but our pre-quants are blocksize = 64
# AMD Instinct GPUs need blocksize = 128 on bitsandbytes < 0.49.2 (our pre-quants use blocksize = 64)
if not ALLOW_PREQUANTIZED_MODELS and model_name.lower().endswith(
("-unsloth-bnb-4bit", "-bnb-4bit")
):
Expand Down Expand Up @@ -1005,7 +1005,7 @@ def from_pretrained(
load_in_fp8 = False

# Check if pre-quantized models are allowed
# For eg AMD Instinct GPUs need blocksize = 128, but our pre-quants are blocksize = 64
# AMD Instinct GPUs need blocksize = 128 on bitsandbytes < 0.49.2 (our pre-quants use blocksize = 64)
if not ALLOW_PREQUANTIZED_MODELS and model_name.lower().endswith(
("-unsloth-bnb-4bit", "-bnb-4bit")
):
Expand Down Expand Up @@ -1288,7 +1288,7 @@ def from_pretrained(
if not use_exact_model_name:
model_name = get_model_name(model_name, load_in_4bit)
# Check if pre-quantized models are allowed
# For eg AMD Instinct GPUs need blocksize = 128, but our pre-quants are blocksize = 64
# AMD Instinct GPUs need blocksize = 128 on bitsandbytes < 0.49.2 (our pre-quants use blocksize = 64)
if not ALLOW_PREQUANTIZED_MODELS and model_name.lower().endswith(
("-unsloth-bnb-4bit", "-bnb-4bit")
):
Expand Down