unslothai · danielhanchen · May 15, 2026 · May 15, 2026 · chatgpt-codex-connector · May 15, 2026
diff --git a/unsloth/_gpu_init.py b/unsloth/_gpu_init.py
@@ -209,7 +209,7 @@
 del patch_peft_weight_converter_compatibility
 
 # Torch 2.4 has including_emulation
-if DEVICE_TYPE == "cuda":
+if DEVICE_TYPE == "cuda" and torch.cuda.is_available():
     major_version, minor_version = torch.cuda.get_device_capability()
     SUPPORTS_BFLOAT16 = major_version >= 8
 
@@ -233,12 +233,18 @@ def is_bf16_supported():
     # torch.xpu.is_bf16_supported() does not have including_emulation
     # set SUPPORTS_BFLOAT16 as torch.xpu.is_bf16_supported()
     SUPPORTS_BFLOAT16 = torch.xpu.is_bf16_supported()
+else:
+    # CPU-only CI under UNSLOTH_ALLOW_CPU=1. We can't probe device
+    # capability, so assume no bf16 -- training won't run on this host
+    # anyway, this branch only exists to let `import unsloth.trainer`
+    # succeed for source-inspection tests.
+    SUPPORTS_BFLOAT16 = False
 
 # For Gradio HF Spaces?
 # if "SPACE_AUTHOR_NAME" not in os.environ and "SPACE_REPO_NAME" not in os.environ:
 import triton
 
-if DEVICE_TYPE == "cuda":
+if DEVICE_TYPE == "cuda" and torch.cuda.is_available():
     libcuda_dirs = lambda: None
     if Version(triton.__version__) >= Version("3.0.0"):
         try:
@@ -349,5 +355,10 @@ def is_bf16_supported():
     launch_openenv,
 )
 
-# Patch TRL trainers for backwards compatibility
-_patch_trl_trainer()
+# Patch TRL trainers for backwards compatibility.
+# Skipped under UNSLOTH_ALLOW_CPU=1 (CPU-only CI) because rebinding
+# trl.SFTTrainer.__init__ to a generic wrapper changes
+# inspect.getsource(SFTTrainer.__init__) and corrupts downstream
+# drift detectors that anchor on the pristine upstream source.
+if os.environ.get("UNSLOTH_ALLOW_CPU", "0") != "1":
+    _patch_trl_trainer()
@@ -63,6 +63,10 @@ def get_device_type():
     # Check torch.accelerator
     if hasattr(torch, "accelerator"):
         if not torch.accelerator.is_available():
+            # Test-only CPU fallback. The env var is read exactly once per
+            # process because get_device_type is @functools.cache'd.
+            if os.environ.get("UNSLOTH_ALLOW_CPU", "0") == "1":
+                return "cuda"
             raise NotImplementedError(
                 "Unsloth cannot find any torch accelerator? You need a GPU."
             )
@@ -73,6 +77,8 @@ def get_device_type():
                 f"But `torch.accelerator.current_accelerator()` works with it being = `{accelerator}`\n"
                 f"Please reinstall torch - it's most likely broken :("
             )
+    if os.environ.get("UNSLOTH_ALLOW_CPU", "0") == "1":
+        return "cuda"
     raise NotImplementedError(
         "Unsloth currently only works on NVIDIA, AMD and Intel GPUs."
     )

@@ -1193,7 +1193,7 @@ def _is_openai_available():
 HAS_FLASH_ATTENTION = False
 HAS_FLASH_ATTENTION_SOFTCAPPING = False
 
-if DEVICE_TYPE == "cuda":
+if DEVICE_TYPE == "cuda" and torch.cuda.is_available():
     major_version, minor_version = torch.cuda.get_device_capability()
     torch.cuda.get_device_capability = functools.cache(torch.cuda.get_device_capability)
 

@@ -2270,6 +2270,11 @@ def patch_trl_vllm_generation():
 def PatchFastRL(algorithm = None, FastLanguageModel = None):
     if FastLanguageModel is not None:
         PatchRL(FastLanguageModel)
+    # Under UNSLOTH_ALLOW_CPU=1 (CPU-only CI), skip TRL trainer rewriting so
+    # downstream `inspect.getsource(trl.SFTTrainer)` drift detectors see the
+    # pristine upstream class, not the compiled Unsloth* wrappers.
+    if os.environ.get("UNSLOTH_ALLOW_CPU", "0") == "1":
+        return
     # Install the disable_gradient_checkpointing noop BEFORE
     # patch_trl_rl_trainers. patch_trl_rl_trainers imports extra trl.* trainer
     # submodules while generating the compiled cache; any new trl.* modules