From fa786c5d17fb7f758eb790d270d703e50820ed2e Mon Sep 17 00:00:00 2001 From: leizhenyuan Date: Fri, 21 Nov 2025 08:51:08 +0000 Subject: [PATCH 1/4] skip xpu fbgemm fp8 --- unsloth/models/_utils.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/unsloth/models/_utils.py b/unsloth/models/_utils.py index d63986db49..1405871379 100644 --- a/unsloth/models/_utils.py +++ b/unsloth/models/_utils.py @@ -2295,14 +2295,16 @@ def verify_fp8_support_if_applicable(model_config): raise ValueError( f"Unsloth: FP8 quantization is only supported on CUDA GPUs. You are using {DEVICE_TYPE}." ) - major_version, minor_version = torch.cuda.get_device_capability() - if quant_method == "fbgemm_fp8" and major_version < 9: - # While L4 does support FP8 as data type, it doesn't have fbgemm (package) support yet. So we restrict it. - raise ValueError( - f"Unsloth: FBGEMM FP8 quantization is only supported on H100 and higher GPUs. L4 is not supported. You are using {torch.cuda.get_device_name()}. Refer to https://developer.nvidia.com/cuda-gpus for more details." - ) - if quant_method == "fp8" and major_version * 10 + minor_version < 89: - # In case of block quantized, we allow L4 because we fall back to torchao kernels. - raise ValueError( - f"Unsloth: FP8 quantization is only supported on L4 and higher GPUs with compute capability 8.9 or higher. You are using {torch.cuda.get_device_name()}. Refer to https://developer.nvidia.com/cuda-gpus for more details." - ) + # todo: verify xpu fbgemm fp8 support status and change code here + if DEVICE_TYPE == "xpu": + major_version, minor_version = torch.cuda.get_device_capability() + if quant_method == "fbgemm_fp8" and major_version < 9: + # While L4 does support FP8 as data type, it doesn't have fbgemm (package) support yet. So we restrict it. + raise ValueError( + f"Unsloth: FBGEMM FP8 quantization is only supported on H100 and higher GPUs. L4 is not supported. You are using {torch.cuda.get_device_name()}. Refer to https://developer.nvidia.com/cuda-gpus for more details." + ) + if quant_method == "fp8" and major_version * 10 + minor_version < 89: + # In case of block quantized, we allow L4 because we fall back to torchao kernels. + raise ValueError( + f"Unsloth: FP8 quantization is only supported on L4 and higher GPUs with compute capability 8.9 or higher. You are using {torch.cuda.get_device_name()}. Refer to https://developer.nvidia.com/cuda-gpus for more details." + ) From da71450f3b0755d99f8ba12af4451cd301bc5251 Mon Sep 17 00:00:00 2001 From: Lei Zhenyuan Date: Thu, 27 Nov 2025 09:47:26 +0800 Subject: [PATCH 2/4] Apply suggestion from @gemini-code-assist[bot] Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- unsloth/models/_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsloth/models/_utils.py b/unsloth/models/_utils.py index 1405871379..4cc5076827 100644 --- a/unsloth/models/_utils.py +++ b/unsloth/models/_utils.py @@ -2296,7 +2296,7 @@ def verify_fp8_support_if_applicable(model_config): f"Unsloth: FP8 quantization is only supported on CUDA GPUs. You are using {DEVICE_TYPE}." ) # todo: verify xpu fbgemm fp8 support status and change code here - if DEVICE_TYPE == "xpu": + if DEVICE_TYPE == "cuda": major_version, minor_version = torch.cuda.get_device_capability() if quant_method == "fbgemm_fp8" and major_version < 9: # While L4 does support FP8 as data type, it doesn't have fbgemm (package) support yet. So we restrict it. From e2d7cbdaab716b72fefba425463875cd86e9f1ef Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 11 Dec 2025 00:55:49 +0000 Subject: [PATCH 3/4] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- unsloth/models/_utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/unsloth/models/_utils.py b/unsloth/models/_utils.py index 187806e00a..3c455092bc 100644 --- a/unsloth/models/_utils.py +++ b/unsloth/models/_utils.py @@ -2344,4 +2344,3 @@ def _get_inference_mode_context_manager(model: torch.nn.Module): return torch.no_grad() else: return torch.inference_mode() - From 9b71c192c53de38c6e83e96ecd098b42a9a92b9a Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Wed, 10 Dec 2025 21:08:40 -0800 Subject: [PATCH 4/4] Apply suggestion from @danielhanchen --- unsloth/models/_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsloth/models/_utils.py b/unsloth/models/_utils.py index 3c455092bc..bdb8f38a50 100644 --- a/unsloth/models/_utils.py +++ b/unsloth/models/_utils.py @@ -2312,7 +2312,7 @@ def verify_fp8_support_if_applicable(model_config): f"Unsloth: FP8 quantization is only supported on CUDA GPUs. You are using {DEVICE_TYPE}." ) - # todo: need to add fp8 support for intel xpu device + # [TODO] Need to add FP8 support for Intel XPUs if DEVICE_TYPE == "cuda": major_version, minor_version = torch.cuda.get_device_capability() if quant_method == "fbgemm_fp8" and major_version < 9: