From ad9d622124918dacc548e3c0b46de040a79bda2e Mon Sep 17 00:00:00 2001
From: Daniel Han <daniel@unsloth.ai>
Date: Wed, 18 Mar 2026 04:33:56 +0000
Subject: [PATCH 1/9] Allow Windows setup to complete without NVIDIA GPU

setup.ps1 previously hard-exited if nvidia-smi was not found, blocking
setup entirely on CPU-only or non-NVIDIA machines. The backend already
supports CPU and MLX (Apple Silicon) in chat-only GGUF mode, and the
Linux/Mac setup.sh handles missing GPUs gracefully.

Changes:
- Convert the GPU check from a hard exit to a warning
- Guard CUDA toolkit installation behind $HasNvidiaSmi
- Install CPU-only PyTorch when no GPU is detected
- Build llama.cpp without CUDA flags when no GPU is present
- Update doc comment to reflect CPU support
---
 studio/setup.ps1 | 48 ++++++++++++++++++++++++++++++++----------------
 1 file changed, 32 insertions(+), 16 deletions(-)

diff --git a/studio/setup.ps1 b/studio/setup.ps1
index 2420448deb..4592f6f0af 100644
--- a/studio/setup.ps1
+++ b/studio/setup.ps1
@@ -8,7 +8,7 @@
     Always installs Node.js if needed. When running from pip install:
     skips frontend build (already bundled). When running from git repo:
     full setup including frontend build.
-    Requires an NVIDIA GPU -- CPU-only machines are not supported.
+    Supports NVIDIA GPU (full training + inference) and CPU-only (GGUF chat mode).
 .NOTES
     Usage: powershell -ExecutionPolicy Bypass -File setup.ps1
 #>
@@ -260,14 +260,13 @@ try {
 } catch {}
 if (-not $HasNvidiaSmi) {
     Write-Host ""
-    Write-Host "[ERROR] Unsloth Studio requires an NVIDIA GPU." -ForegroundColor Red
-    Write-Host "        CPU-only machines are not supported." -ForegroundColor Red
+    Write-Host "[WARN] No NVIDIA GPU detected. Studio will run in chat-only (GGUF) mode." -ForegroundColor Yellow
+    Write-Host "       Training and GPU inference require an NVIDIA GPU with drivers installed." -ForegroundColor Yellow
+    Write-Host "       https://www.nvidia.com/Download/index.aspx" -ForegroundColor Yellow
     Write-Host ""
-    Write-Host "        If you have an NVIDIA GPU, ensure the driver is installed:" -ForegroundColor Yellow
-    Write-Host "        https://www.nvidia.com/Download/index.aspx" -ForegroundColor Yellow
-    exit 1
+} else {
+    Write-Host "[OK] NVIDIA GPU detected" -ForegroundColor Green
 }
-Write-Host "[OK] NVIDIA GPU detected" -ForegroundColor Green
 
 # ============================================
 # 1a.5. Windows Long Paths (required for deep node_modules / Python paths)
@@ -389,6 +388,7 @@ if ($vsResult) {
 # ============================================
 # 1e. CUDA Toolkit (nvcc for llama.cpp build + env vars)
 # ============================================
+if ($HasNvidiaSmi) {
 # IMPORTANT: The CUDA Toolkit version must be <= the max CUDA version the
 # NVIDIA driver supports.  nvidia-smi reports this as "CUDA Version: X.Y".
 # If we install a toolkit newer than the driver supports, llama-server will
@@ -643,6 +643,9 @@ Write-Host "   CudaToolkitDir = $CudaToolkitRoot\" -ForegroundColor Gray
 if (-not $CudaArch) {
     Write-Host "   [WARN] Could not detect compute capability -- cmake will use defaults" -ForegroundColor Yellow
 }
+} else {
+    Write-Host "[SKIP] CUDA Toolkit -- no NVIDIA GPU detected" -ForegroundColor Yellow
+}
 
 # ============================================
 # 1f. Node.js / npm (skip if pip-installed -- only needed for frontend build)
@@ -880,14 +883,21 @@ $env:TORCHINDUCTOR_CACHE_DIR = $TorchCacheDir
 [Environment]::SetEnvironmentVariable('TORCHINDUCTOR_CACHE_DIR', $TorchCacheDir, 'User')
 Write-Host "[OK] TORCHINDUCTOR_CACHE_DIR set to $TorchCacheDir (avoids MAX_PATH issues)" -ForegroundColor Green
 
-$CuTag = Get-PytorchCudaTag
-Write-Host "   Installing PyTorch with CUDA support ($CuTag)..." -ForegroundColor Cyan
-pip install torch torchvision torchaudio --index-url "https://download.pytorch.org/whl/$CuTag" 2>&1 | Out-Null
+if ($HasNvidiaSmi) {
+    $CuTag = Get-PytorchCudaTag
+    Write-Host "   Installing PyTorch with CUDA support ($CuTag)..." -ForegroundColor Cyan
+    pip install torch torchvision torchaudio --index-url "https://download.pytorch.org/whl/$CuTag" 2>&1 | Out-Null
 
-# Install Triton for Windows (enables torch.compile — without it training can hang)
-Write-Host "   Installing Triton for Windows..." -ForegroundColor Cyan
-pip install "triton-windows<3.7" 2>&1 | Out-Null
-Write-Host "[OK] Triton for Windows installed (enables torch.compile)" -ForegroundColor Green
+    # Install Triton for Windows (enables torch.compile -- without it training can hang)
+    Write-Host "   Installing Triton for Windows..." -ForegroundColor Cyan
+    pip install "triton-windows<3.7" 2>&1 | Out-Null
+} else {
+    Write-Host "   Installing PyTorch (CPU-only)..." -ForegroundColor Cyan
+    pip install torch torchvision torchaudio 2>&1 | Out-Null
+}
+if ($HasNvidiaSmi) {
+    Write-Host "[OK] Triton for Windows installed (enables torch.compile)" -ForegroundColor Green
+}
 
 # Ordered heavy dependency installation — shared cross-platform script
 Write-Host "   Running ordered dependency installation..." -ForegroundColor Cyan
@@ -987,7 +997,11 @@ if (Test-Path $LlamaServerBin) {
     Write-Host "[OK] llama-server already exists at $LlamaServerBin" -ForegroundColor Green
 } else {
     Write-Host ""
-    Write-Host "Building llama.cpp with CUDA support..." -ForegroundColor Cyan
+    if ($HasNvidiaSmi) {
+        Write-Host "Building llama.cpp with CUDA support..." -ForegroundColor Cyan
+    } else {
+        Write-Host "Building llama.cpp (CPU-only, no NVIDIA GPU detected)..." -ForegroundColor Cyan
+    }
     Write-Host "   This typically takes 5-10 minutes on first build." -ForegroundColor Gray
     Write-Host ""
 
@@ -1066,7 +1080,8 @@ if (Test-Path $LlamaServerBin) {
             $CmakeArgs += '-DLLAMA_CURL=OFF'
         }
         $CmakeArgs += '-DCMAKE_EXE_LINKER_FLAGS=/NODEFAULTLIB:LIBCMT'
-        # CUDA flags (Unsloth-aligned)
+        # CUDA flags (Unsloth-aligned) -- only if GPU available
+        if ($HasNvidiaSmi -and $NvccPath) {
         $CmakeArgs += '-DGGML_CUDA=ON'
         $CmakeArgs += "-DCUDAToolkit_ROOT=$CudaToolkitRoot"
         $CmakeArgs += "-DCUDA_TOOLKIT_ROOT_DIR=$CudaToolkitRoot"
@@ -1092,6 +1107,7 @@ if (Test-Path $LlamaServerBin) {
                 # else: omit flag entirely, let cmake pick defaults
             }
         }
+        }
 
         cmake @CmakeArgs 2>&1 | Out-Null
         if ($LASTEXITCODE -ne 0) {

From a7d4e8860f53b3053e8f742c471073439651ca09 Mon Sep 17 00:00:00 2001
From: Daniel Han <23090290+danielhanchen@users.noreply.github.com>
Date: Wed, 18 Mar 2026 05:33:10 +0000
Subject: [PATCH 2/9] Show pip progress for PyTorch download on Windows

The torch CUDA wheel is ~2.8 GB and the CPU wheel is ~300 MB. With
| Out-Null suppressing all output, the install appeared completely
frozen with no feedback. Remove | Out-Null for the torch install
lines so pip's download progress bar is visible. Add a size hint
so users know the download is expected to take a while.

Also moves the Triton success message inside the GPU branch so it
only prints when Triton was actually installed.
---
 studio/setup.ps1 | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/studio/setup.ps1 b/studio/setup.ps1
index 4592f6f0af..c0dde94100 100644
--- a/studio/setup.ps1
+++ b/studio/setup.ps1
@@ -886,17 +886,16 @@ Write-Host "[OK] TORCHINDUCTOR_CACHE_DIR set to $TorchCacheDir (avoids MAX_PATH
 if ($HasNvidiaSmi) {
     $CuTag = Get-PytorchCudaTag
     Write-Host "   Installing PyTorch with CUDA support ($CuTag)..." -ForegroundColor Cyan
-    pip install torch torchvision torchaudio --index-url "https://download.pytorch.org/whl/$CuTag" 2>&1 | Out-Null
+    Write-Host "   (This download is ~2.8 GB -- may take a few minutes)" -ForegroundColor Gray
+    pip install torch torchvision torchaudio --index-url "https://download.pytorch.org/whl/$CuTag"
 
     # Install Triton for Windows (enables torch.compile -- without it training can hang)
     Write-Host "   Installing Triton for Windows..." -ForegroundColor Cyan
     pip install "triton-windows<3.7" 2>&1 | Out-Null
+    Write-Host "[OK] Triton for Windows installed (enables torch.compile)" -ForegroundColor Green
 } else {
     Write-Host "   Installing PyTorch (CPU-only)..." -ForegroundColor Cyan
-    pip install torch torchvision torchaudio 2>&1 | Out-Null
-}
-if ($HasNvidiaSmi) {
-    Write-Host "[OK] Triton for Windows installed (enables torch.compile)" -ForegroundColor Green
+    pip install torch torchvision torchaudio
 }
 
 # Ordered heavy dependency installation — shared cross-platform script

From c53f5298caa7783df11cf9279afc66890664ae96 Mon Sep 17 00:00:00 2001
From: Daniel Han <23090290+danielhanchen@users.noreply.github.com>
Date: Wed, 18 Mar 2026 05:36:54 +0000
Subject: [PATCH 3/9] Guard CUDA env re-sanitization behind GPU check in
 llama.cpp build

The CUDA_PATH re-sanitization block (lines 1020-1033) references
$CudaToolkitRoot which is only set when $HasNvidiaSmi is true and
the CUDA Toolkit section runs. On CPU-only machines, $CudaToolkitRoot
is null, causing Split-Path to throw:

  Split-Path : Cannot bind argument to parameter 'Path' because it is null.

Wrap the entire block in `if ($HasNvidiaSmi -and $CudaToolkitRoot)`.
---
 studio/setup.ps1 | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/studio/setup.ps1 b/studio/setup.ps1
index c0dde94100..8ad4009bac 100644
--- a/studio/setup.ps1
+++ b/studio/setup.ps1
@@ -1020,17 +1020,19 @@ if (Test-Path $LlamaServerBin) {
     # Re-sanitize CUDA_PATH_V* vars — Refresh-Environment (called during
     # Node/Python installs above) may have repopulated conflicting versioned
     # vars from the Machine registry.
-    $cudaPathVars2 = @([Environment]::GetEnvironmentVariables('Process').Keys | Where-Object { $_ -match '^CUDA_PATH_V' })
-    foreach ($v2 in $cudaPathVars2) {
-        [Environment]::SetEnvironmentVariable($v2, $null, 'Process')
-    }
-    $tkDirName2 = Split-Path $CudaToolkitRoot -Leaf
-    if ($tkDirName2 -match '^v(\d+)\.(\d+)') {
-        [Environment]::SetEnvironmentVariable("CUDA_PATH_V$($Matches[1])_$($Matches[2])", $CudaToolkitRoot, 'Process')
+    if ($HasNvidiaSmi -and $CudaToolkitRoot) {
+        $cudaPathVars2 = @([Environment]::GetEnvironmentVariables('Process').Keys | Where-Object { $_ -match '^CUDA_PATH_V' })
+        foreach ($v2 in $cudaPathVars2) {
+            [Environment]::SetEnvironmentVariable($v2, $null, 'Process')
+        }
+        $tkDirName2 = Split-Path $CudaToolkitRoot -Leaf
+        if ($tkDirName2 -match '^v(\d+)\.(\d+)') {
+            [Environment]::SetEnvironmentVariable("CUDA_PATH_V$($Matches[1])_$($Matches[2])", $CudaToolkitRoot, 'Process')
+        }
+        # Also re-assert CUDA_PATH and CudaToolkitDir in case they were overwritten
+        [Environment]::SetEnvironmentVariable('CUDA_PATH', $CudaToolkitRoot, 'Process')
+        [Environment]::SetEnvironmentVariable('CudaToolkitDir', "$CudaToolkitRoot\", 'Process')
     }
-    # Also re-assert CUDA_PATH and CudaToolkitDir in case they were overwritten
-    [Environment]::SetEnvironmentVariable('CUDA_PATH', $CudaToolkitRoot, 'Process')
-    [Environment]::SetEnvironmentVariable('CudaToolkitDir', "$CudaToolkitRoot\", 'Process')
 
     # -- Step A: Clone or pull llama.cpp --
 

From be61a3435caa0489c9af43cb462814edb585ccf2 Mon Sep 17 00:00:00 2001
From: Daniel Han <23090290+danielhanchen@users.noreply.github.com>
Date: Wed, 18 Mar 2026 05:50:35 +0000
Subject: [PATCH 4/9] Fix cmake not found on Windows after winget install

Two issues fixed:

1. After winget installs cmake, Refresh-Environment may not pick up the
   new PATH entry (MSI PATH changes sometimes need a new shell). Added a
   fallback that probes cmake's default install locations (Program Files,
   LocalAppData) and adds the directory to PATH explicitly if found.

2. If cmake is still unavailable when the llama.cpp build starts (e.g.
   winget failed silently or PATH was not updated), the build now skips
   gracefully with a [SKIP] warning instead of crashing with
   "cmake : The term 'cmake' is not recognized".
---
 studio/setup.ps1 | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/studio/setup.ps1 b/studio/setup.ps1
index 8ad4009bac..33aeceef65 100644
--- a/studio/setup.ps1
+++ b/studio/setup.ps1
@@ -340,6 +340,25 @@ if (-not $HasCmake) {
             $HasCmake = $null -ne (Get-Command cmake -ErrorAction SilentlyContinue)
         } catch { }
     }
+    # winget may succeed but cmake isn't on PATH yet (MSI PATH changes need a
+    # new shell). Try the default install location as a fallback.
+    if (-not $HasCmake) {
+        $cmakeDefaults = @(
+            "$env:ProgramFiles\CMake\bin",
+            "${env:ProgramFiles(x86)}\CMake\bin",
+            "$env:LOCALAPPDATA\CMake\bin"
+        )
+        foreach ($d in $cmakeDefaults) {
+            if (Test-Path (Join-Path $d "cmake.exe")) {
+                $env:Path = "$d;$env:Path"
+                $HasCmake = $null -ne (Get-Command cmake -ErrorAction SilentlyContinue)
+                if ($HasCmake) {
+                    Write-Host "   Found cmake at $d (added to PATH)" -ForegroundColor Gray
+                    break
+                }
+            }
+        }
+    }
     if ($HasCmake) {
         Write-Host "[OK] CMake installed" -ForegroundColor Green
     } else {
@@ -991,9 +1010,16 @@ $LlamaCppDir = Join-Path $UnslothHome "llama.cpp"
 $BuildDir = Join-Path $LlamaCppDir "build"
 $LlamaServerBin = Join-Path $BuildDir "bin\Release\llama-server.exe"
 
+$HasCmakeForBuild = $null -ne (Get-Command cmake -ErrorAction SilentlyContinue)
+
 if (Test-Path $LlamaServerBin) {
     Write-Host ""
     Write-Host "[OK] llama-server already exists at $LlamaServerBin" -ForegroundColor Green
+} elseif (-not $HasCmakeForBuild) {
+    Write-Host ""
+    Write-Host "[SKIP] llama-server build -- cmake not available" -ForegroundColor Yellow
+    Write-Host "       GGUF inference and export will not be available." -ForegroundColor Yellow
+    Write-Host "       Install CMake from https://cmake.org/download/ and re-run setup." -ForegroundColor Yellow
 } else {
     Write-Host ""
     if ($HasNvidiaSmi) {

From c2f12229407b7c569b51aad3e960a7a13cc51203 Mon Sep 17 00:00:00 2001
From: Daniel Han <23090290+danielhanchen@users.noreply.github.com>
Date: Wed, 18 Mar 2026 06:08:53 +0000
Subject: [PATCH 5/9] Show cmake errors on failure and retry CUDA VS
 integration with elevation

Two fixes for issue #4405 (Windows setup fails at cmake configure):

1. cmake configure: capture output and display it on failure instead of
   piping to Out-Null. When the error mentions "No CUDA toolset found",
   print a hint about the CUDA VS integration files.

2. CUDA VS integration copy: when the direct Copy-Item fails (needs
   admin access to write to Program Files), retry with Start-Process
   -Verb RunAs to prompt for elevation. This is the root cause of the
   "No CUDA toolset found" cmake failure -- the .targets files that let
   MSBuild compile .cu files are missing from the VS BuildCustomizations
   directory.
---
 studio/setup.ps1 | 34 ++++++++++++++++++++++++++++------
 1 file changed, 28 insertions(+), 6 deletions(-)

diff --git a/studio/setup.ps1 b/studio/setup.ps1
index 33aeceef65..bd8f225db7 100644
--- a/studio/setup.ps1
+++ b/studio/setup.ps1
@@ -643,11 +643,24 @@ if ($VsInstallPath -and $CudaToolkitRoot) {
                 Copy-Item "$cudaExtras\*" $vsCustomizations -Force -ErrorAction Stop
                 Write-Host "   [OK] CUDA VS integration files installed" -ForegroundColor Green
             } catch {
-                Write-Host "   [WARN] Could not copy CUDA VS integration files (may need admin)" -ForegroundColor Yellow
-                Write-Host "          Manual fix: copy contents of" -ForegroundColor Yellow
-                Write-Host "            $cudaExtras" -ForegroundColor Cyan
-                Write-Host "          into:" -ForegroundColor Yellow
-                Write-Host "            $vsCustomizations" -ForegroundColor Cyan
+                # Direct copy failed (needs admin). Try elevated copy via Start-Process.
+                try {
+                    $copyCmd = "Copy-Item '$cudaExtras\*' '$vsCustomizations' -Force"
+                    Start-Process powershell -ArgumentList "-NoProfile -Command $copyCmd" -Verb RunAs -Wait -ErrorAction Stop
+                    $hasTargetsRetry = Get-ChildItem $vsCustomizations -Filter "CUDA *.targets" -ErrorAction SilentlyContinue
+                    if ($hasTargetsRetry) {
+                        Write-Host "   [OK] CUDA VS integration files installed (elevated)" -ForegroundColor Green
+                    } else {
+                        throw "Copy did not produce .targets files"
+                    }
+                } catch {
+                    Write-Host "   [WARN] Could not copy CUDA VS integration files" -ForegroundColor Yellow
+                    Write-Host "          The llama.cpp build may fail with 'No CUDA toolset found'." -ForegroundColor Yellow
+                    Write-Host "          Manual fix: copy contents of" -ForegroundColor Yellow
+                    Write-Host "            $cudaExtras" -ForegroundColor Cyan
+                    Write-Host "          into:" -ForegroundColor Yellow
+                    Write-Host "            $vsCustomizations" -ForegroundColor Cyan
+                }
             }
         }
     }
@@ -1136,10 +1149,19 @@ if (Test-Path $LlamaServerBin) {
         }
         }
 
-        cmake @CmakeArgs 2>&1 | Out-Null
+        $cmakeOutput = cmake @CmakeArgs 2>&1 | Out-String
         if ($LASTEXITCODE -ne 0) {
             $BuildOk = $false
             $FailedStep = "cmake configure"
+            Write-Host $cmakeOutput -ForegroundColor Red
+            if ($cmakeOutput -match 'No CUDA toolset found|CUDA_TOOLKIT_ROOT_DIR|nvcc') {
+                Write-Host ""
+                Write-Host "   Hint: CUDA VS integration may be missing. Try running as admin:" -ForegroundColor Yellow
+                Write-Host "   Copy contents of:" -ForegroundColor Yellow
+                Write-Host "     <CUDA_PATH>\extras\visual_studio_integration\MSBuildExtensions" -ForegroundColor Yellow
+                Write-Host "   into:" -ForegroundColor Yellow
+                Write-Host "     <VS_PATH>\MSBuild\Microsoft\VC\v170\BuildCustomizations" -ForegroundColor Yellow
+            }
         }
     }
 

From 4f1e9df5ff8804516f4a88f18d81e361fc4b59f5 Mon Sep 17 00:00:00 2001
From: Daniel Han <23090290+danielhanchen@users.noreply.github.com>
Date: Wed, 18 Mar 2026 06:12:27 +0000
Subject: [PATCH 6/9] Address reviewer feedback: cmake PATH persistence, stale
 cache, torch error check

1. Persist cmake PATH to user registry so Refresh-Environment cannot
   drop it later in the same setup run. Previously the process-only
   PATH addition at phase 1 could vanish when Refresh-Environment
   rebuilt PATH from registry during phase 2/3 installs.

2. Clean stale CMake cache before configure. If a previous run built
   with CUDA and the user reruns without a GPU (or vice versa), the
   cached GGML_CUDA value would persist. Now the build dir is removed
   before configure.

3. Explicitly set -DGGML_CUDA=OFF for CPU-only builds instead of just
   omitting CUDA flags. This prevents cmake from auto-detecting a
   partial CUDA installation.

4. Fix CUDA cmake flag indentation -- was misaligned from the original
   PR, now consistently indented inside the if/else block.

5. Fail hard if pip install torch returns a non-zero exit code instead
   of silently continuing with a broken environment.
---
 studio/setup.ps1 | 72 +++++++++++++++++++++++++++++++-----------------
 1 file changed, 47 insertions(+), 25 deletions(-)

diff --git a/studio/setup.ps1 b/studio/setup.ps1
index bd8f225db7..87bc43b87f 100644
--- a/studio/setup.ps1
+++ b/studio/setup.ps1
@@ -351,6 +351,11 @@ if (-not $HasCmake) {
         foreach ($d in $cmakeDefaults) {
             if (Test-Path (Join-Path $d "cmake.exe")) {
                 $env:Path = "$d;$env:Path"
+                # Persist to user PATH so Refresh-Environment does not drop it later
+                $userPath = [Environment]::GetEnvironmentVariable('Path', 'User')
+                if (-not $userPath -or $userPath -notlike "*$d*") {
+                    [Environment]::SetEnvironmentVariable('Path', "$d;$userPath", 'User')
+                }
                 $HasCmake = $null -ne (Get-Command cmake -ErrorAction SilentlyContinue)
                 if ($HasCmake) {
                     Write-Host "   Found cmake at $d (added to PATH)" -ForegroundColor Gray
@@ -920,6 +925,10 @@ if ($HasNvidiaSmi) {
     Write-Host "   Installing PyTorch with CUDA support ($CuTag)..." -ForegroundColor Cyan
     Write-Host "   (This download is ~2.8 GB -- may take a few minutes)" -ForegroundColor Gray
     pip install torch torchvision torchaudio --index-url "https://download.pytorch.org/whl/$CuTag"
+    if ($LASTEXITCODE -ne 0) {
+        Write-Host "[FAILED] PyTorch CUDA install failed (exit code $LASTEXITCODE)" -ForegroundColor Red
+        exit 1
+    }
 
     # Install Triton for Windows (enables torch.compile -- without it training can hang)
     Write-Host "   Installing Triton for Windows..." -ForegroundColor Cyan
@@ -928,6 +937,10 @@ if ($HasNvidiaSmi) {
 } else {
     Write-Host "   Installing PyTorch (CPU-only)..." -ForegroundColor Cyan
     pip install torch torchvision torchaudio
+    if ($LASTEXITCODE -ne 0) {
+        Write-Host "[FAILED] PyTorch install failed (exit code $LASTEXITCODE)" -ForegroundColor Red
+        exit 1
+    }
 }
 
 # Ordered heavy dependency installation — shared cross-platform script
@@ -1091,7 +1104,14 @@ if (Test-Path $LlamaServerBin) {
         }
     }
 
-    # -- Step B: cmake configure (CUDA + Unsloth flags) --
+    # -- Step B: cmake configure --
+    # Clean stale CMake cache to prevent previous CUDA settings from leaking
+    # into a CPU-only rebuild (or vice versa).
+    $CmakeCacheFile = Join-Path $BuildDir "CMakeCache.txt"
+    if (Test-Path $CmakeCacheFile) {
+        Remove-Item -Recurse -Force $BuildDir
+    }
+
     if ($BuildOk) {
         Write-Host ""
         Write-Host "--- cmake configure ---" -ForegroundColor Cyan
@@ -1120,33 +1140,35 @@ if (Test-Path $LlamaServerBin) {
             $CmakeArgs += '-DLLAMA_CURL=OFF'
         }
         $CmakeArgs += '-DCMAKE_EXE_LINKER_FLAGS=/NODEFAULTLIB:LIBCMT'
-        # CUDA flags (Unsloth-aligned) -- only if GPU available
+        # CUDA flags -- only if GPU available, otherwise explicitly disable
         if ($HasNvidiaSmi -and $NvccPath) {
-        $CmakeArgs += '-DGGML_CUDA=ON'
-        $CmakeArgs += "-DCUDAToolkit_ROOT=$CudaToolkitRoot"
-        $CmakeArgs += "-DCUDA_TOOLKIT_ROOT_DIR=$CudaToolkitRoot"
-        $CmakeArgs += "-DCMAKE_CUDA_COMPILER=$NvccPath"
-        $CmakeArgs += '-DGGML_CUDA_FA_ALL_QUANTS=ON'
-        $CmakeArgs += '-DGGML_CUDA_F16=OFF'
-        $CmakeArgs += '-DGGML_CUDA_GRAPHS=OFF'
-        $CmakeArgs += '-DGGML_CUDA_FORCE_CUBLAS=OFF'
-        $CmakeArgs += '-DGGML_CUDA_PEER_MAX_BATCH_SIZE=8192'
-        if ($CudaArch) {
-            # Validate nvcc actually supports this architecture
-            if (Test-NvccArchSupport -NvccExe $NvccPath -Arch $CudaArch) {
-                $CmakeArgs += "-DCMAKE_CUDA_ARCHITECTURES=$CudaArch"
-            } else {
-                # GPU arch too new for this toolkit — fall back to highest supported.
-                # PTX forward-compatibility will JIT-compile for the actual GPU at runtime.
-                $maxArch = Get-NvccMaxArch -NvccExe $NvccPath
-                if ($maxArch) {
-                    $CmakeArgs += "-DCMAKE_CUDA_ARCHITECTURES=$maxArch"
-                    Write-Host "   [WARN] GPU is sm_$CudaArch but nvcc only supports up to sm_$maxArch" -ForegroundColor Yellow
-                    Write-Host "          Building with sm_$maxArch (PTX will JIT for your GPU at runtime)" -ForegroundColor Yellow
+            $CmakeArgs += '-DGGML_CUDA=ON'
+            $CmakeArgs += "-DCUDAToolkit_ROOT=$CudaToolkitRoot"
+            $CmakeArgs += "-DCUDA_TOOLKIT_ROOT_DIR=$CudaToolkitRoot"
+            $CmakeArgs += "-DCMAKE_CUDA_COMPILER=$NvccPath"
+            $CmakeArgs += '-DGGML_CUDA_FA_ALL_QUANTS=ON'
+            $CmakeArgs += '-DGGML_CUDA_F16=OFF'
+            $CmakeArgs += '-DGGML_CUDA_GRAPHS=OFF'
+            $CmakeArgs += '-DGGML_CUDA_FORCE_CUBLAS=OFF'
+            $CmakeArgs += '-DGGML_CUDA_PEER_MAX_BATCH_SIZE=8192'
+            if ($CudaArch) {
+                # Validate nvcc actually supports this architecture
+                if (Test-NvccArchSupport -NvccExe $NvccPath -Arch $CudaArch) {
+                    $CmakeArgs += "-DCMAKE_CUDA_ARCHITECTURES=$CudaArch"
+                } else {
+                    # GPU arch too new for this toolkit -- fall back to highest supported.
+                    # PTX forward-compatibility will JIT-compile for the actual GPU at runtime.
+                    $maxArch = Get-NvccMaxArch -NvccExe $NvccPath
+                    if ($maxArch) {
+                        $CmakeArgs += "-DCMAKE_CUDA_ARCHITECTURES=$maxArch"
+                        Write-Host "   [WARN] GPU is sm_$CudaArch but nvcc only supports up to sm_$maxArch" -ForegroundColor Yellow
+                        Write-Host "          Building with sm_$maxArch (PTX will JIT for your GPU at runtime)" -ForegroundColor Yellow
+                    }
+                    # else: omit flag entirely, let cmake pick defaults
                 }
-                # else: omit flag entirely, let cmake pick defaults
             }
-        }
+        } else {
+            $CmakeArgs += '-DGGML_CUDA=OFF'
         }
 
         $cmakeOutput = cmake @CmakeArgs 2>&1 | Out-String

From 7125772c0adff14176e201d0da1d5f4bca3c03db Mon Sep 17 00:00:00 2001
From: Daniel Han <23090290+danielhanchen@users.noreply.github.com>
Date: Wed, 18 Mar 2026 06:16:37 +0000
Subject: [PATCH 7/9] Remove extra CUDA cmake flags to align Windows with Linux
 build

Drop GGML_CUDA_FA_ALL_QUANTS, GGML_CUDA_F16, GGML_CUDA_GRAPHS,
GGML_CUDA_FORCE_CUBLAS, and GGML_CUDA_PEER_MAX_BATCH_SIZE flags.
The Linux build in setup.sh only sets GGML_CUDA=ON and lets llama.cpp
use its defaults for everything else. Keep Windows consistent.
---
 studio/setup.ps1 | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/studio/setup.ps1 b/studio/setup.ps1
index 87bc43b87f..667460ebd3 100644
--- a/studio/setup.ps1
+++ b/studio/setup.ps1
@@ -1146,11 +1146,6 @@ if (Test-Path $LlamaServerBin) {
             $CmakeArgs += "-DCUDAToolkit_ROOT=$CudaToolkitRoot"
             $CmakeArgs += "-DCUDA_TOOLKIT_ROOT_DIR=$CudaToolkitRoot"
             $CmakeArgs += "-DCMAKE_CUDA_COMPILER=$NvccPath"
-            $CmakeArgs += '-DGGML_CUDA_FA_ALL_QUANTS=ON'
-            $CmakeArgs += '-DGGML_CUDA_F16=OFF'
-            $CmakeArgs += '-DGGML_CUDA_GRAPHS=OFF'
-            $CmakeArgs += '-DGGML_CUDA_FORCE_CUBLAS=OFF'
-            $CmakeArgs += '-DGGML_CUDA_PEER_MAX_BATCH_SIZE=8192'
             if ($CudaArch) {
                 # Validate nvcc actually supports this architecture
                 if (Test-NvccArchSupport -NvccExe $NvccPath -Arch $CudaArch) {

From 8035e9caf90e27d73f280bf78f2f410b8efd2252 Mon Sep 17 00:00:00 2001
From: Daniel Han <23090290+danielhanchen@users.noreply.github.com>
Date: Wed, 18 Mar 2026 06:28:33 +0000
Subject: [PATCH 8/9] Address reviewer round 2: GPU probe fallback, Triton
 check, stale binary rebuild

1. GPU detection: fallback to default nvidia-smi install locations
   (Program Files\NVIDIA Corporation\NVSMI, System32) when nvidia-smi
   is not on PATH. Prevents silent CPU-only provisioning on machines
   that have a GPU but a broken PATH.

2. Triton: check $LASTEXITCODE after pip install and print [WARN]
   on failure instead of unconditional [OK].

3. Stale llama-server: check CMakeCache.txt for GGML_CUDA setting
   and rebuild if the existing binary does not match the current GPU
   mode (e.g. CUDA binary on a now-CPU-only rerun, or vice versa).
---
 studio/setup.ps1 | 47 +++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 45 insertions(+), 2 deletions(-)

diff --git a/studio/setup.ps1 b/studio/setup.ps1
index 667460ebd3..c2203fc5ca 100644
--- a/studio/setup.ps1
+++ b/studio/setup.ps1
@@ -251,13 +251,35 @@ Write-Host "+==============================================+" -ForegroundColor G
 # ==========================================================================
 
 # ============================================
-# 1a. GPU requirement check
+# 1a. GPU detection
 # ============================================
 $HasNvidiaSmi = $false
 try {
     nvidia-smi 2>&1 | Out-Null
     if ($LASTEXITCODE -eq 0) { $HasNvidiaSmi = $true }
 } catch {}
+# Fallback: nvidia-smi may not be on PATH even though a GPU + driver exist.
+# Check the default install location and the Windows driver store.
+if (-not $HasNvidiaSmi) {
+    $nvSmiDefaults = @(
+        "$env:ProgramFiles\NVIDIA Corporation\NVSMI\nvidia-smi.exe",
+        "$env:SystemRoot\System32\nvidia-smi.exe"
+    )
+    foreach ($p in $nvSmiDefaults) {
+        if (Test-Path $p) {
+            try {
+                & $p 2>&1 | Out-Null
+                if ($LASTEXITCODE -eq 0) {
+                    $nvSmiDir = Split-Path $p -Parent
+                    $env:Path = "$nvSmiDir;$env:Path"
+                    $HasNvidiaSmi = $true
+                    Write-Host "   Found nvidia-smi at $nvSmiDir (added to PATH)" -ForegroundColor Gray
+                    break
+                }
+            } catch {}
+        }
+    }
+}
 if (-not $HasNvidiaSmi) {
     Write-Host ""
     Write-Host "[WARN] No NVIDIA GPU detected. Studio will run in chat-only (GGUF) mode." -ForegroundColor Yellow
@@ -933,7 +955,11 @@ if ($HasNvidiaSmi) {
     # Install Triton for Windows (enables torch.compile -- without it training can hang)
     Write-Host "   Installing Triton for Windows..." -ForegroundColor Cyan
     pip install "triton-windows<3.7" 2>&1 | Out-Null
-    Write-Host "[OK] Triton for Windows installed (enables torch.compile)" -ForegroundColor Green
+    if ($LASTEXITCODE -ne 0) {
+        Write-Host "[WARN] Triton install failed -- torch.compile may not work" -ForegroundColor Yellow
+    } else {
+        Write-Host "[OK] Triton for Windows installed (enables torch.compile)" -ForegroundColor Green
+    }
 } else {
     Write-Host "   Installing PyTorch (CPU-only)..." -ForegroundColor Cyan
     pip install torch torchvision torchaudio
@@ -1038,7 +1064,24 @@ $LlamaServerBin = Join-Path $BuildDir "bin\Release\llama-server.exe"
 
 $HasCmakeForBuild = $null -ne (Get-Command cmake -ErrorAction SilentlyContinue)
 
+# Check if existing llama-server matches current GPU mode. A CUDA-built binary
+# on a now-CPU-only machine (or vice versa) needs to be rebuilt.
+$NeedRebuild = $false
 if (Test-Path $LlamaServerBin) {
+    $CmakeCacheFile = Join-Path $BuildDir "CMakeCache.txt"
+    if (Test-Path $CmakeCacheFile) {
+        $cachedCuda = Select-String -Path $CmakeCacheFile -Pattern 'GGML_CUDA:BOOL=ON' -Quiet
+        if ($HasNvidiaSmi -and -not $cachedCuda) {
+            Write-Host "   Existing llama-server is CPU-only but GPU is available -- rebuilding" -ForegroundColor Yellow
+            $NeedRebuild = $true
+        } elseif (-not $HasNvidiaSmi -and $cachedCuda) {
+            Write-Host "   Existing llama-server was built with CUDA but no GPU detected -- rebuilding" -ForegroundColor Yellow
+            $NeedRebuild = $true
+        }
+    }
+}
+
+if ((Test-Path $LlamaServerBin) -and -not $NeedRebuild) {
     Write-Host ""
     Write-Host "[OK] llama-server already exists at $LlamaServerBin" -ForegroundColor Green
 } elseif (-not $HasCmakeForBuild) {

From 69d99a44fef4ed95262a90b74481e126410e3ffd Mon Sep 17 00:00:00 2001
From: Daniel Han <daniel@unsloth.ai>
Date: Wed, 18 Mar 2026 07:41:40 +0000
Subject: [PATCH 9/9] Fix nvidia-smi PATH persistence and cmake requirement for
 CPU-only

1. Store nvidia-smi as an absolute path ($NvidiaSmiExe) on first
   detection. All later calls (Get-CudaComputeCapability,
   Get-PytorchCudaTag, CUDA toolkit detection) use this absolute
   path instead of relying on PATH. This survives Refresh-Environment
   which rebuilds PATH from the registry and drops process-only
   additions.

2. Make cmake fatal for CPU-only installs. CPU-only machines depend
   entirely on llama-server for GGUF chat mode, so reporting "Setup
   Complete!" without it is misleading. GPU machines can still skip
   the llama-server build since they have other inference paths.
---
 studio/setup.ps1 | 45 ++++++++++++++++++++++++++++++++-------------
 1 file changed, 32 insertions(+), 13 deletions(-)

diff --git a/studio/setup.ps1 b/studio/setup.ps1
index c2203fc5ca..dac308966f 100644
--- a/studio/setup.ps1
+++ b/studio/setup.ps1
@@ -107,11 +107,15 @@ function Find-Nvcc {
 # Returns e.g. "80" for A100 (8.0), "89" for RTX 4090 (8.9), etc.
 # Returns $null if detection fails.
 function Get-CudaComputeCapability {
-    $nvSmi = Get-Command nvidia-smi -ErrorAction SilentlyContinue
-    if (-not $nvSmi) { return $null }
+    # Use the resolved absolute path ($NvidiaSmiExe) to survive Refresh-Environment
+    $smiExe = if ($script:NvidiaSmiExe) { $script:NvidiaSmiExe } else {
+        $cmd = Get-Command nvidia-smi -ErrorAction SilentlyContinue
+        if ($cmd) { $cmd.Source } else { $null }
+    }
+    if (-not $smiExe) { return $null }
 
     try {
-        $raw = & nvidia-smi --query-gpu=compute_cap --format=csv,noheader 2>$null
+        $raw = & $smiExe --query-gpu=compute_cap --format=csv,noheader 2>$null
         if ($LASTEXITCODE -ne 0 -or -not $raw) { return $null }
 
         # nvidia-smi may return multiple GPUs; take the first one
@@ -168,14 +172,17 @@ function Get-NvccMaxArch {
 # https://download.pytorch.org/whl/<tag>. The tag must not exceed the driver's
 # capability: e.g. driver "CUDA Version: 12.9" → cu128 (not cu130).
 function Get-PytorchCudaTag {
-    $nvSmi = Get-Command nvidia-smi -ErrorAction SilentlyContinue
-    if (-not $nvSmi) { return "cu124" }
+    $smiExe = if ($script:NvidiaSmiExe) { $script:NvidiaSmiExe } else {
+        $cmd = Get-Command nvidia-smi -ErrorAction SilentlyContinue
+        if ($cmd) { $cmd.Source } else { $null }
+    }
+    if (-not $smiExe) { return "cu124" }
 
     try {
         # 2>&1 | Out-String merges stderr into stdout then converts to a single
-        # string.  Plain 2>$null doesn't fully suppress stderr in PS 5.1 —
+        # string.  Plain 2>$null doesn't fully suppress stderr in PS 5.1 --
         # ErrorRecord objects leak into $output and break the -match.
-        $output = & nvidia-smi 2>&1 | Out-String
+        $output = & $smiExe 2>&1 | Out-String
         if ($output -match 'CUDA Version:\s+(\d+)\.(\d+)') {
             $major = [int]$Matches[1]
             $minor = [int]$Matches[2]
@@ -254,9 +261,16 @@ Write-Host "+==============================================+" -ForegroundColor G
 # 1a. GPU detection
 # ============================================
 $HasNvidiaSmi = $false
+$NvidiaSmiExe = $null  # Absolute path -- survives Refresh-Environment
 try {
-    nvidia-smi 2>&1 | Out-Null
-    if ($LASTEXITCODE -eq 0) { $HasNvidiaSmi = $true }
+    $nvSmiCmd = Get-Command nvidia-smi -ErrorAction SilentlyContinue
+    if ($nvSmiCmd) {
+        & $nvSmiCmd.Source 2>&1 | Out-Null
+        if ($LASTEXITCODE -eq 0) {
+            $HasNvidiaSmi = $true
+            $NvidiaSmiExe = $nvSmiCmd.Source
+        }
+    }
 } catch {}
 # Fallback: nvidia-smi may not be on PATH even though a GPU + driver exist.
 # Check the default install location and the Windows driver store.
@@ -270,10 +284,9 @@ if (-not $HasNvidiaSmi) {
             try {
                 & $p 2>&1 | Out-Null
                 if ($LASTEXITCODE -eq 0) {
-                    $nvSmiDir = Split-Path $p -Parent
-                    $env:Path = "$nvSmiDir;$env:Path"
                     $HasNvidiaSmi = $true
-                    Write-Host "   Found nvidia-smi at $nvSmiDir (added to PATH)" -ForegroundColor Gray
+                    $NvidiaSmiExe = $p
+                    Write-Host "   Found nvidia-smi at $(Split-Path $p -Parent)" -ForegroundColor Gray
                     break
                 }
             } catch {}
@@ -443,7 +456,7 @@ if ($HasNvidiaSmi) {
 # -- Detect max CUDA version the driver supports --
 $DriverMaxCuda = $null
 try {
-    $smiOut = nvidia-smi 2>&1 | Out-String
+    $smiOut = & $NvidiaSmiExe 2>&1 | Out-String
     if ($smiOut -match "CUDA Version:\s+([\d]+)\.([\d]+)") {
         $DriverMaxCuda = "$($Matches[1]).$($Matches[2])"
         Write-Host "   Driver supports up to CUDA $DriverMaxCuda" -ForegroundColor Gray
@@ -1086,6 +1099,12 @@ if ((Test-Path $LlamaServerBin) -and -not $NeedRebuild) {
     Write-Host "[OK] llama-server already exists at $LlamaServerBin" -ForegroundColor Green
 } elseif (-not $HasCmakeForBuild) {
     Write-Host ""
+    if (-not $HasNvidiaSmi) {
+        # CPU-only machines depend entirely on llama-server for GGUF chat -- cmake is required
+        Write-Host "[ERROR] CMake is required to build llama-server for GGUF chat mode." -ForegroundColor Red
+        Write-Host "        Install CMake from https://cmake.org/download/ and re-run setup." -ForegroundColor Yellow
+        exit 1
+    }
     Write-Host "[SKIP] llama-server build -- cmake not available" -ForegroundColor Yellow
     Write-Host "       GGUF inference and export will not be available." -ForegroundColor Yellow
     Write-Host "       Install CMake from https://cmake.org/download/ and re-run setup." -ForegroundColor Yellow