From fa786c5d17fb7f758eb790d270d703e50820ed2e Mon Sep 17 00:00:00 2001
From: leizhenyuan <zhenyuan.lei@intel.com>
Date: Fri, 21 Nov 2025 08:51:08 +0000
Subject: [PATCH 1/4] skip xpu fbgemm fp8

---
 unsloth/models/_utils.py | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/unsloth/models/_utils.py b/unsloth/models/_utils.py
index d63986db49..1405871379 100644
--- a/unsloth/models/_utils.py
+++ b/unsloth/models/_utils.py
@@ -2295,14 +2295,16 @@ def verify_fp8_support_if_applicable(model_config):
         raise ValueError(
             f"Unsloth: FP8 quantization is only supported on CUDA GPUs. You are using {DEVICE_TYPE}."
         )
-    major_version, minor_version = torch.cuda.get_device_capability()
-    if quant_method == "fbgemm_fp8" and major_version < 9:
-        # While L4 does support FP8 as data type, it doesn't have fbgemm (package) support yet. So we restrict it.
-        raise ValueError(
-            f"Unsloth: FBGEMM FP8 quantization is only supported on H100 and higher GPUs. L4 is not supported. You are using {torch.cuda.get_device_name()}. Refer to https://developer.nvidia.com/cuda-gpus for more details."
-        )
-    if quant_method == "fp8" and major_version * 10 + minor_version < 89:
-        # In case of block quantized, we allow L4 because we fall back to torchao kernels.
-        raise ValueError(
-            f"Unsloth: FP8 quantization is only supported on L4 and higher GPUs with compute capability 8.9 or higher. You are using {torch.cuda.get_device_name()}. Refer to https://developer.nvidia.com/cuda-gpus for more details."
-        )
+    # todo: verify xpu fbgemm fp8 support status and change code here
+    if DEVICE_TYPE == "xpu":
+        major_version, minor_version = torch.cuda.get_device_capability()
+        if quant_method == "fbgemm_fp8" and major_version < 9:
+            # While L4 does support FP8 as data type, it doesn't have fbgemm (package) support yet. So we restrict it.
+            raise ValueError(
+                f"Unsloth: FBGEMM FP8 quantization is only supported on H100 and higher GPUs. L4 is not supported. You are using {torch.cuda.get_device_name()}. Refer to https://developer.nvidia.com/cuda-gpus for more details."
+            )
+        if quant_method == "fp8" and major_version * 10 + minor_version < 89:
+            # In case of block quantized, we allow L4 because we fall back to torchao kernels.
+            raise ValueError(
+                f"Unsloth: FP8 quantization is only supported on L4 and higher GPUs with compute capability 8.9 or higher. You are using {torch.cuda.get_device_name()}. Refer to https://developer.nvidia.com/cuda-gpus for more details."
+            )

From da71450f3b0755d99f8ba12af4451cd301bc5251 Mon Sep 17 00:00:00 2001
From: Lei Zhenyuan <zhenyuan.lei@intel.com>
Date: Thu, 27 Nov 2025 09:47:26 +0800
Subject: [PATCH 2/4] Apply suggestion from @gemini-code-assist[bot]

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
---
 unsloth/models/_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/unsloth/models/_utils.py b/unsloth/models/_utils.py
index 1405871379..4cc5076827 100644
--- a/unsloth/models/_utils.py
+++ b/unsloth/models/_utils.py
@@ -2296,7 +2296,7 @@ def verify_fp8_support_if_applicable(model_config):
             f"Unsloth: FP8 quantization is only supported on CUDA GPUs. You are using {DEVICE_TYPE}."
         )
     # todo: verify xpu fbgemm fp8 support status and change code here
-    if DEVICE_TYPE == "xpu":
+    if DEVICE_TYPE == "cuda":
         major_version, minor_version = torch.cuda.get_device_capability()
         if quant_method == "fbgemm_fp8" and major_version < 9:
             # While L4 does support FP8 as data type, it doesn't have fbgemm (package) support yet. So we restrict it.

From e2d7cbdaab716b72fefba425463875cd86e9f1ef Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 11 Dec 2025 00:55:49 +0000
Subject: [PATCH 3/4] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 unsloth/models/_utils.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/unsloth/models/_utils.py b/unsloth/models/_utils.py
index 187806e00a..3c455092bc 100644
--- a/unsloth/models/_utils.py
+++ b/unsloth/models/_utils.py
@@ -2344,4 +2344,3 @@ def _get_inference_mode_context_manager(model: torch.nn.Module):
         return torch.no_grad()
     else:
         return torch.inference_mode()
-

From 9b71c192c53de38c6e83e96ecd098b42a9a92b9a Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Wed, 10 Dec 2025 21:08:40 -0800
Subject: [PATCH 4/4] Apply suggestion from @danielhanchen

---
 unsloth/models/_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/unsloth/models/_utils.py b/unsloth/models/_utils.py
index 3c455092bc..bdb8f38a50 100644
--- a/unsloth/models/_utils.py
+++ b/unsloth/models/_utils.py
@@ -2312,7 +2312,7 @@ def verify_fp8_support_if_applicable(model_config):
             f"Unsloth: FP8 quantization is only supported on CUDA GPUs. You are using {DEVICE_TYPE}."
         )
 
-    # todo: need to add fp8 support for intel xpu device
+    # [TODO] Need to add FP8 support for Intel XPUs
     if DEVICE_TYPE == "cuda":
         major_version, minor_version = torch.cuda.get_device_capability()
         if quant_method == "fbgemm_fp8" and major_version < 9: