Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions unsloth/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,7 @@ def is_bf16_supported(): return SUPPORTS_BFLOAT16
# For Gradio HF Spaces?
# if "SPACE_AUTHOR_NAME" not in os.environ and "SPACE_REPO_NAME" not in os.environ:
import triton
import bitsandbytes as bnb
if DEVICE_TYPE == "cuda":
libcuda_dirs = lambda: None
if Version(triton.__version__) >= Version("3.0.0"):
Expand All @@ -181,7 +182,6 @@ def is_bf16_supported(): return SUPPORTS_BFLOAT16
else: from triton.common.build import libcuda_dirs

# Try loading bitsandbytes and triton
import bitsandbytes as bnb
try:
cdequantize_blockwise_fp32 = bnb.functional.lib.cdequantize_blockwise_fp32
libcuda_dirs()
Expand Down Expand Up @@ -233,7 +233,6 @@ def is_bf16_supported(): return SUPPORTS_BFLOAT16
# NO-OP for rocm device
pass
elif DEVICE_TYPE == "xpu":
# currently intel xpu will not support bnb, will add support in the future
# TODO: check triton for intel installed properly.
pass

Expand Down
34 changes: 8 additions & 26 deletions unsloth/kernels/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,18 +75,19 @@ def calculate_settings(n : int) -> (int, int,):
pass

HAS_CUDA_STREAM = False
import bitsandbytes as bnb
get_ptr = bnb.functional.get_ptr
# INTEL GPU specific logic
if DEVICE_TYPE == "xpu":
# TODO: Changed here after adding XPU BNB support
HAS_XPU_STREAM = True
def get_ptr(x: Optional[torch.Tensor]):
raise RuntimeError("XPU BNB support is not implemented yet. This function should not be called.")

else:
# NVIDIA-GPU logic here as default
import bitsandbytes as bnb
# https://github.com/bitsandbytes-foundation/bitsandbytes/pull/1330/files
HAS_CUDA_STREAM = Version(bnb.__version__) > Version("0.43.3")
get_ptr = bnb.functional.get_ptr


if DEVICE_COUNT > 1:
Expand Down Expand Up @@ -148,31 +149,12 @@ def _get_tensor_stream(tensor: torch_Tensor) -> c_void_p:
# Bitsandbytes operations
ctypes_c_int = ctypes.c_int
ctypes_c_int32 = ctypes.c_int32
# INTEL GPU Specific Logic
if DEVICE_TYPE == "xpu":
# TODO: After adding XPU BNB support, this function should be implemented
def cdequantize_blockwise_fp32(*args, **kwargs):
raise RuntimeError("XPU BNB support is not implemented yet. cdequantize_blockwise_fp32 should not be called now.")

def cdequantize_blockwise_fp16_nf4(*args, **kwargs):
raise RuntimeError("XPU BNB support is not implemented yet. cdequantize_blockwise_fp16_nf4 should not be called now.")

def cdequantize_blockwise_bf16_nf4(*args, **kwargs):
raise RuntimeError("XPU BNB support is not implemented yet. cdequantize_blockwise_bf16_nf4 should not be called now.")
cdequantize_blockwise_fp32 = bnb.functional.lib.cdequantize_blockwise_fp32
cdequantize_blockwise_fp16_nf4 = bnb.functional.lib.cdequantize_blockwise_fp16_nf4
cdequantize_blockwise_bf16_nf4 = bnb.functional.lib.cdequantize_blockwise_bf16_nf4
cgemm_4bit_inference_naive_fp16 = bnb.functional.lib.cgemm_4bit_inference_naive_fp16
cgemm_4bit_inference_naive_bf16 = bnb.functional.lib.cgemm_4bit_inference_naive_bf16

def cgemm_4bit_inference_naive_fp16(*args, **kwargs):
raise RuntimeError("XPU BNB support is not implemented yet. cgemm_4bit_inference_naive_fp16 should not be called now.")

def cgemm_4bit_inference_naive_bf16(*args, **kwargs):
raise RuntimeError("XPU BNB support is not implemented yet. cgemm_4bit_inference_naive_bf16 should not be called now.")
else:
# NVIDIA GPU Default Logic
cdequantize_blockwise_fp32 = bnb.functional.lib.cdequantize_blockwise_fp32
cdequantize_blockwise_fp16_nf4 = bnb.functional.lib.cdequantize_blockwise_fp16_nf4
cdequantize_blockwise_bf16_nf4 = bnb.functional.lib.cdequantize_blockwise_bf16_nf4
cgemm_4bit_inference_naive_fp16 = bnb.functional.lib.cgemm_4bit_inference_naive_fp16
cgemm_4bit_inference_naive_bf16 = bnb.functional.lib.cgemm_4bit_inference_naive_bf16
pass

torch_device_stream = torch.xpu.current_stream if DEVICE_TYPE == "xpu" else torch.cuda.current_stream

Expand Down
4 changes: 2 additions & 2 deletions unsloth/models/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -517,8 +517,8 @@ def _is_openai_available(): return False

# =============================================
# Get Flash Attention v2 if Ampere (RTX 30xx, A100)
if DEVICE_TYPE in ("cuda", "hip"):
import bitsandbytes as bnb
import bitsandbytes as bnb


from transformers import AutoTokenizer
from transformers.utils.import_utils import _is_package_available
Expand Down