From ad9d622124918dacc548e3c0b46de040a79bda2e Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Wed, 18 Mar 2026 04:33:56 +0000 Subject: [PATCH 1/9] Allow Windows setup to complete without NVIDIA GPU setup.ps1 previously hard-exited if nvidia-smi was not found, blocking setup entirely on CPU-only or non-NVIDIA machines. The backend already supports CPU and MLX (Apple Silicon) in chat-only GGUF mode, and the Linux/Mac setup.sh handles missing GPUs gracefully. Changes: - Convert the GPU check from a hard exit to a warning - Guard CUDA toolkit installation behind $HasNvidiaSmi - Install CPU-only PyTorch when no GPU is detected - Build llama.cpp without CUDA flags when no GPU is present - Update doc comment to reflect CPU support --- studio/setup.ps1 | 48 ++++++++++++++++++++++++++++++++---------------- 1 file changed, 32 insertions(+), 16 deletions(-) diff --git a/studio/setup.ps1 b/studio/setup.ps1 index 2420448deb..4592f6f0af 100644 --- a/studio/setup.ps1 +++ b/studio/setup.ps1 @@ -8,7 +8,7 @@ Always installs Node.js if needed. When running from pip install: skips frontend build (already bundled). When running from git repo: full setup including frontend build. - Requires an NVIDIA GPU -- CPU-only machines are not supported. + Supports NVIDIA GPU (full training + inference) and CPU-only (GGUF chat mode). .NOTES Usage: powershell -ExecutionPolicy Bypass -File setup.ps1 #> @@ -260,14 +260,13 @@ try { } catch {} if (-not $HasNvidiaSmi) { Write-Host "" - Write-Host "[ERROR] Unsloth Studio requires an NVIDIA GPU." -ForegroundColor Red - Write-Host " CPU-only machines are not supported." -ForegroundColor Red + Write-Host "[WARN] No NVIDIA GPU detected. Studio will run in chat-only (GGUF) mode." -ForegroundColor Yellow + Write-Host " Training and GPU inference require an NVIDIA GPU with drivers installed." -ForegroundColor Yellow + Write-Host " https://www.nvidia.com/Download/index.aspx" -ForegroundColor Yellow Write-Host "" - Write-Host " If you have an NVIDIA GPU, ensure the driver is installed:" -ForegroundColor Yellow - Write-Host " https://www.nvidia.com/Download/index.aspx" -ForegroundColor Yellow - exit 1 +} else { + Write-Host "[OK] NVIDIA GPU detected" -ForegroundColor Green } -Write-Host "[OK] NVIDIA GPU detected" -ForegroundColor Green # ============================================ # 1a.5. Windows Long Paths (required for deep node_modules / Python paths) @@ -389,6 +388,7 @@ if ($vsResult) { # ============================================ # 1e. CUDA Toolkit (nvcc for llama.cpp build + env vars) # ============================================ +if ($HasNvidiaSmi) { # IMPORTANT: The CUDA Toolkit version must be <= the max CUDA version the # NVIDIA driver supports. nvidia-smi reports this as "CUDA Version: X.Y". # If we install a toolkit newer than the driver supports, llama-server will @@ -643,6 +643,9 @@ Write-Host " CudaToolkitDir = $CudaToolkitRoot\" -ForegroundColor Gray if (-not $CudaArch) { Write-Host " [WARN] Could not detect compute capability -- cmake will use defaults" -ForegroundColor Yellow } +} else { + Write-Host "[SKIP] CUDA Toolkit -- no NVIDIA GPU detected" -ForegroundColor Yellow +} # ============================================ # 1f. Node.js / npm (skip if pip-installed -- only needed for frontend build) @@ -880,14 +883,21 @@ $env:TORCHINDUCTOR_CACHE_DIR = $TorchCacheDir [Environment]::SetEnvironmentVariable('TORCHINDUCTOR_CACHE_DIR', $TorchCacheDir, 'User') Write-Host "[OK] TORCHINDUCTOR_CACHE_DIR set to $TorchCacheDir (avoids MAX_PATH issues)" -ForegroundColor Green -$CuTag = Get-PytorchCudaTag -Write-Host " Installing PyTorch with CUDA support ($CuTag)..." -ForegroundColor Cyan -pip install torch torchvision torchaudio --index-url "https://download.pytorch.org/whl/$CuTag" 2>&1 | Out-Null +if ($HasNvidiaSmi) { + $CuTag = Get-PytorchCudaTag + Write-Host " Installing PyTorch with CUDA support ($CuTag)..." -ForegroundColor Cyan + pip install torch torchvision torchaudio --index-url "https://download.pytorch.org/whl/$CuTag" 2>&1 | Out-Null -# Install Triton for Windows (enables torch.compile — without it training can hang) -Write-Host " Installing Triton for Windows..." -ForegroundColor Cyan -pip install "triton-windows<3.7" 2>&1 | Out-Null -Write-Host "[OK] Triton for Windows installed (enables torch.compile)" -ForegroundColor Green + # Install Triton for Windows (enables torch.compile -- without it training can hang) + Write-Host " Installing Triton for Windows..." -ForegroundColor Cyan + pip install "triton-windows<3.7" 2>&1 | Out-Null +} else { + Write-Host " Installing PyTorch (CPU-only)..." -ForegroundColor Cyan + pip install torch torchvision torchaudio 2>&1 | Out-Null +} +if ($HasNvidiaSmi) { + Write-Host "[OK] Triton for Windows installed (enables torch.compile)" -ForegroundColor Green +} # Ordered heavy dependency installation — shared cross-platform script Write-Host " Running ordered dependency installation..." -ForegroundColor Cyan @@ -987,7 +997,11 @@ if (Test-Path $LlamaServerBin) { Write-Host "[OK] llama-server already exists at $LlamaServerBin" -ForegroundColor Green } else { Write-Host "" - Write-Host "Building llama.cpp with CUDA support..." -ForegroundColor Cyan + if ($HasNvidiaSmi) { + Write-Host "Building llama.cpp with CUDA support..." -ForegroundColor Cyan + } else { + Write-Host "Building llama.cpp (CPU-only, no NVIDIA GPU detected)..." -ForegroundColor Cyan + } Write-Host " This typically takes 5-10 minutes on first build." -ForegroundColor Gray Write-Host "" @@ -1066,7 +1080,8 @@ if (Test-Path $LlamaServerBin) { $CmakeArgs += '-DLLAMA_CURL=OFF' } $CmakeArgs += '-DCMAKE_EXE_LINKER_FLAGS=/NODEFAULTLIB:LIBCMT' - # CUDA flags (Unsloth-aligned) + # CUDA flags (Unsloth-aligned) -- only if GPU available + if ($HasNvidiaSmi -and $NvccPath) { $CmakeArgs += '-DGGML_CUDA=ON' $CmakeArgs += "-DCUDAToolkit_ROOT=$CudaToolkitRoot" $CmakeArgs += "-DCUDA_TOOLKIT_ROOT_DIR=$CudaToolkitRoot" @@ -1092,6 +1107,7 @@ if (Test-Path $LlamaServerBin) { # else: omit flag entirely, let cmake pick defaults } } + } cmake @CmakeArgs 2>&1 | Out-Null if ($LASTEXITCODE -ne 0) { From a7d4e8860f53b3053e8f742c471073439651ca09 Mon Sep 17 00:00:00 2001 From: Daniel Han <23090290+danielhanchen@users.noreply.github.com> Date: Wed, 18 Mar 2026 05:33:10 +0000 Subject: [PATCH 2/9] Show pip progress for PyTorch download on Windows The torch CUDA wheel is ~2.8 GB and the CPU wheel is ~300 MB. With | Out-Null suppressing all output, the install appeared completely frozen with no feedback. Remove | Out-Null for the torch install lines so pip's download progress bar is visible. Add a size hint so users know the download is expected to take a while. Also moves the Triton success message inside the GPU branch so it only prints when Triton was actually installed. --- studio/setup.ps1 | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/studio/setup.ps1 b/studio/setup.ps1 index 4592f6f0af..c0dde94100 100644 --- a/studio/setup.ps1 +++ b/studio/setup.ps1 @@ -886,17 +886,16 @@ Write-Host "[OK] TORCHINDUCTOR_CACHE_DIR set to $TorchCacheDir (avoids MAX_PATH if ($HasNvidiaSmi) { $CuTag = Get-PytorchCudaTag Write-Host " Installing PyTorch with CUDA support ($CuTag)..." -ForegroundColor Cyan - pip install torch torchvision torchaudio --index-url "https://download.pytorch.org/whl/$CuTag" 2>&1 | Out-Null + Write-Host " (This download is ~2.8 GB -- may take a few minutes)" -ForegroundColor Gray + pip install torch torchvision torchaudio --index-url "https://download.pytorch.org/whl/$CuTag" # Install Triton for Windows (enables torch.compile -- without it training can hang) Write-Host " Installing Triton for Windows..." -ForegroundColor Cyan pip install "triton-windows<3.7" 2>&1 | Out-Null + Write-Host "[OK] Triton for Windows installed (enables torch.compile)" -ForegroundColor Green } else { Write-Host " Installing PyTorch (CPU-only)..." -ForegroundColor Cyan - pip install torch torchvision torchaudio 2>&1 | Out-Null -} -if ($HasNvidiaSmi) { - Write-Host "[OK] Triton for Windows installed (enables torch.compile)" -ForegroundColor Green + pip install torch torchvision torchaudio } # Ordered heavy dependency installation — shared cross-platform script From c53f5298caa7783df11cf9279afc66890664ae96 Mon Sep 17 00:00:00 2001 From: Daniel Han <23090290+danielhanchen@users.noreply.github.com> Date: Wed, 18 Mar 2026 05:36:54 +0000 Subject: [PATCH 3/9] Guard CUDA env re-sanitization behind GPU check in llama.cpp build The CUDA_PATH re-sanitization block (lines 1020-1033) references $CudaToolkitRoot which is only set when $HasNvidiaSmi is true and the CUDA Toolkit section runs. On CPU-only machines, $CudaToolkitRoot is null, causing Split-Path to throw: Split-Path : Cannot bind argument to parameter 'Path' because it is null. Wrap the entire block in `if ($HasNvidiaSmi -and $CudaToolkitRoot)`. --- studio/setup.ps1 | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/studio/setup.ps1 b/studio/setup.ps1 index c0dde94100..8ad4009bac 100644 --- a/studio/setup.ps1 +++ b/studio/setup.ps1 @@ -1020,17 +1020,19 @@ if (Test-Path $LlamaServerBin) { # Re-sanitize CUDA_PATH_V* vars — Refresh-Environment (called during # Node/Python installs above) may have repopulated conflicting versioned # vars from the Machine registry. - $cudaPathVars2 = @([Environment]::GetEnvironmentVariables('Process').Keys | Where-Object { $_ -match '^CUDA_PATH_V' }) - foreach ($v2 in $cudaPathVars2) { - [Environment]::SetEnvironmentVariable($v2, $null, 'Process') - } - $tkDirName2 = Split-Path $CudaToolkitRoot -Leaf - if ($tkDirName2 -match '^v(\d+)\.(\d+)') { - [Environment]::SetEnvironmentVariable("CUDA_PATH_V$($Matches[1])_$($Matches[2])", $CudaToolkitRoot, 'Process') + if ($HasNvidiaSmi -and $CudaToolkitRoot) { + $cudaPathVars2 = @([Environment]::GetEnvironmentVariables('Process').Keys | Where-Object { $_ -match '^CUDA_PATH_V' }) + foreach ($v2 in $cudaPathVars2) { + [Environment]::SetEnvironmentVariable($v2, $null, 'Process') + } + $tkDirName2 = Split-Path $CudaToolkitRoot -Leaf + if ($tkDirName2 -match '^v(\d+)\.(\d+)') { + [Environment]::SetEnvironmentVariable("CUDA_PATH_V$($Matches[1])_$($Matches[2])", $CudaToolkitRoot, 'Process') + } + # Also re-assert CUDA_PATH and CudaToolkitDir in case they were overwritten + [Environment]::SetEnvironmentVariable('CUDA_PATH', $CudaToolkitRoot, 'Process') + [Environment]::SetEnvironmentVariable('CudaToolkitDir', "$CudaToolkitRoot\", 'Process') } - # Also re-assert CUDA_PATH and CudaToolkitDir in case they were overwritten - [Environment]::SetEnvironmentVariable('CUDA_PATH', $CudaToolkitRoot, 'Process') - [Environment]::SetEnvironmentVariable('CudaToolkitDir', "$CudaToolkitRoot\", 'Process') # -- Step A: Clone or pull llama.cpp -- From be61a3435caa0489c9af43cb462814edb585ccf2 Mon Sep 17 00:00:00 2001 From: Daniel Han <23090290+danielhanchen@users.noreply.github.com> Date: Wed, 18 Mar 2026 05:50:35 +0000 Subject: [PATCH 4/9] Fix cmake not found on Windows after winget install Two issues fixed: 1. After winget installs cmake, Refresh-Environment may not pick up the new PATH entry (MSI PATH changes sometimes need a new shell). Added a fallback that probes cmake's default install locations (Program Files, LocalAppData) and adds the directory to PATH explicitly if found. 2. If cmake is still unavailable when the llama.cpp build starts (e.g. winget failed silently or PATH was not updated), the build now skips gracefully with a [SKIP] warning instead of crashing with "cmake : The term 'cmake' is not recognized". --- studio/setup.ps1 | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/studio/setup.ps1 b/studio/setup.ps1 index 8ad4009bac..33aeceef65 100644 --- a/studio/setup.ps1 +++ b/studio/setup.ps1 @@ -340,6 +340,25 @@ if (-not $HasCmake) { $HasCmake = $null -ne (Get-Command cmake -ErrorAction SilentlyContinue) } catch { } } + # winget may succeed but cmake isn't on PATH yet (MSI PATH changes need a + # new shell). Try the default install location as a fallback. + if (-not $HasCmake) { + $cmakeDefaults = @( + "$env:ProgramFiles\CMake\bin", + "${env:ProgramFiles(x86)}\CMake\bin", + "$env:LOCALAPPDATA\CMake\bin" + ) + foreach ($d in $cmakeDefaults) { + if (Test-Path (Join-Path $d "cmake.exe")) { + $env:Path = "$d;$env:Path" + $HasCmake = $null -ne (Get-Command cmake -ErrorAction SilentlyContinue) + if ($HasCmake) { + Write-Host " Found cmake at $d (added to PATH)" -ForegroundColor Gray + break + } + } + } + } if ($HasCmake) { Write-Host "[OK] CMake installed" -ForegroundColor Green } else { @@ -991,9 +1010,16 @@ $LlamaCppDir = Join-Path $UnslothHome "llama.cpp" $BuildDir = Join-Path $LlamaCppDir "build" $LlamaServerBin = Join-Path $BuildDir "bin\Release\llama-server.exe" +$HasCmakeForBuild = $null -ne (Get-Command cmake -ErrorAction SilentlyContinue) + if (Test-Path $LlamaServerBin) { Write-Host "" Write-Host "[OK] llama-server already exists at $LlamaServerBin" -ForegroundColor Green +} elseif (-not $HasCmakeForBuild) { + Write-Host "" + Write-Host "[SKIP] llama-server build -- cmake not available" -ForegroundColor Yellow + Write-Host " GGUF inference and export will not be available." -ForegroundColor Yellow + Write-Host " Install CMake from https://cmake.org/download/ and re-run setup." -ForegroundColor Yellow } else { Write-Host "" if ($HasNvidiaSmi) { From c2f12229407b7c569b51aad3e960a7a13cc51203 Mon Sep 17 00:00:00 2001 From: Daniel Han <23090290+danielhanchen@users.noreply.github.com> Date: Wed, 18 Mar 2026 06:08:53 +0000 Subject: [PATCH 5/9] Show cmake errors on failure and retry CUDA VS integration with elevation Two fixes for issue #4405 (Windows setup fails at cmake configure): 1. cmake configure: capture output and display it on failure instead of piping to Out-Null. When the error mentions "No CUDA toolset found", print a hint about the CUDA VS integration files. 2. CUDA VS integration copy: when the direct Copy-Item fails (needs admin access to write to Program Files), retry with Start-Process -Verb RunAs to prompt for elevation. This is the root cause of the "No CUDA toolset found" cmake failure -- the .targets files that let MSBuild compile .cu files are missing from the VS BuildCustomizations directory. --- studio/setup.ps1 | 34 ++++++++++++++++++++++++++++------ 1 file changed, 28 insertions(+), 6 deletions(-) diff --git a/studio/setup.ps1 b/studio/setup.ps1 index 33aeceef65..bd8f225db7 100644 --- a/studio/setup.ps1 +++ b/studio/setup.ps1 @@ -643,11 +643,24 @@ if ($VsInstallPath -and $CudaToolkitRoot) { Copy-Item "$cudaExtras\*" $vsCustomizations -Force -ErrorAction Stop Write-Host " [OK] CUDA VS integration files installed" -ForegroundColor Green } catch { - Write-Host " [WARN] Could not copy CUDA VS integration files (may need admin)" -ForegroundColor Yellow - Write-Host " Manual fix: copy contents of" -ForegroundColor Yellow - Write-Host " $cudaExtras" -ForegroundColor Cyan - Write-Host " into:" -ForegroundColor Yellow - Write-Host " $vsCustomizations" -ForegroundColor Cyan + # Direct copy failed (needs admin). Try elevated copy via Start-Process. + try { + $copyCmd = "Copy-Item '$cudaExtras\*' '$vsCustomizations' -Force" + Start-Process powershell -ArgumentList "-NoProfile -Command $copyCmd" -Verb RunAs -Wait -ErrorAction Stop + $hasTargetsRetry = Get-ChildItem $vsCustomizations -Filter "CUDA *.targets" -ErrorAction SilentlyContinue + if ($hasTargetsRetry) { + Write-Host " [OK] CUDA VS integration files installed (elevated)" -ForegroundColor Green + } else { + throw "Copy did not produce .targets files" + } + } catch { + Write-Host " [WARN] Could not copy CUDA VS integration files" -ForegroundColor Yellow + Write-Host " The llama.cpp build may fail with 'No CUDA toolset found'." -ForegroundColor Yellow + Write-Host " Manual fix: copy contents of" -ForegroundColor Yellow + Write-Host " $cudaExtras" -ForegroundColor Cyan + Write-Host " into:" -ForegroundColor Yellow + Write-Host " $vsCustomizations" -ForegroundColor Cyan + } } } } @@ -1136,10 +1149,19 @@ if (Test-Path $LlamaServerBin) { } } - cmake @CmakeArgs 2>&1 | Out-Null + $cmakeOutput = cmake @CmakeArgs 2>&1 | Out-String if ($LASTEXITCODE -ne 0) { $BuildOk = $false $FailedStep = "cmake configure" + Write-Host $cmakeOutput -ForegroundColor Red + if ($cmakeOutput -match 'No CUDA toolset found|CUDA_TOOLKIT_ROOT_DIR|nvcc') { + Write-Host "" + Write-Host " Hint: CUDA VS integration may be missing. Try running as admin:" -ForegroundColor Yellow + Write-Host " Copy contents of:" -ForegroundColor Yellow + Write-Host " \extras\visual_studio_integration\MSBuildExtensions" -ForegroundColor Yellow + Write-Host " into:" -ForegroundColor Yellow + Write-Host " \MSBuild\Microsoft\VC\v170\BuildCustomizations" -ForegroundColor Yellow + } } } From 4f1e9df5ff8804516f4a88f18d81e361fc4b59f5 Mon Sep 17 00:00:00 2001 From: Daniel Han <23090290+danielhanchen@users.noreply.github.com> Date: Wed, 18 Mar 2026 06:12:27 +0000 Subject: [PATCH 6/9] Address reviewer feedback: cmake PATH persistence, stale cache, torch error check 1. Persist cmake PATH to user registry so Refresh-Environment cannot drop it later in the same setup run. Previously the process-only PATH addition at phase 1 could vanish when Refresh-Environment rebuilt PATH from registry during phase 2/3 installs. 2. Clean stale CMake cache before configure. If a previous run built with CUDA and the user reruns without a GPU (or vice versa), the cached GGML_CUDA value would persist. Now the build dir is removed before configure. 3. Explicitly set -DGGML_CUDA=OFF for CPU-only builds instead of just omitting CUDA flags. This prevents cmake from auto-detecting a partial CUDA installation. 4. Fix CUDA cmake flag indentation -- was misaligned from the original PR, now consistently indented inside the if/else block. 5. Fail hard if pip install torch returns a non-zero exit code instead of silently continuing with a broken environment. --- studio/setup.ps1 | 72 +++++++++++++++++++++++++++++++----------------- 1 file changed, 47 insertions(+), 25 deletions(-) diff --git a/studio/setup.ps1 b/studio/setup.ps1 index bd8f225db7..87bc43b87f 100644 --- a/studio/setup.ps1 +++ b/studio/setup.ps1 @@ -351,6 +351,11 @@ if (-not $HasCmake) { foreach ($d in $cmakeDefaults) { if (Test-Path (Join-Path $d "cmake.exe")) { $env:Path = "$d;$env:Path" + # Persist to user PATH so Refresh-Environment does not drop it later + $userPath = [Environment]::GetEnvironmentVariable('Path', 'User') + if (-not $userPath -or $userPath -notlike "*$d*") { + [Environment]::SetEnvironmentVariable('Path', "$d;$userPath", 'User') + } $HasCmake = $null -ne (Get-Command cmake -ErrorAction SilentlyContinue) if ($HasCmake) { Write-Host " Found cmake at $d (added to PATH)" -ForegroundColor Gray @@ -920,6 +925,10 @@ if ($HasNvidiaSmi) { Write-Host " Installing PyTorch with CUDA support ($CuTag)..." -ForegroundColor Cyan Write-Host " (This download is ~2.8 GB -- may take a few minutes)" -ForegroundColor Gray pip install torch torchvision torchaudio --index-url "https://download.pytorch.org/whl/$CuTag" + if ($LASTEXITCODE -ne 0) { + Write-Host "[FAILED] PyTorch CUDA install failed (exit code $LASTEXITCODE)" -ForegroundColor Red + exit 1 + } # Install Triton for Windows (enables torch.compile -- without it training can hang) Write-Host " Installing Triton for Windows..." -ForegroundColor Cyan @@ -928,6 +937,10 @@ if ($HasNvidiaSmi) { } else { Write-Host " Installing PyTorch (CPU-only)..." -ForegroundColor Cyan pip install torch torchvision torchaudio + if ($LASTEXITCODE -ne 0) { + Write-Host "[FAILED] PyTorch install failed (exit code $LASTEXITCODE)" -ForegroundColor Red + exit 1 + } } # Ordered heavy dependency installation — shared cross-platform script @@ -1091,7 +1104,14 @@ if (Test-Path $LlamaServerBin) { } } - # -- Step B: cmake configure (CUDA + Unsloth flags) -- + # -- Step B: cmake configure -- + # Clean stale CMake cache to prevent previous CUDA settings from leaking + # into a CPU-only rebuild (or vice versa). + $CmakeCacheFile = Join-Path $BuildDir "CMakeCache.txt" + if (Test-Path $CmakeCacheFile) { + Remove-Item -Recurse -Force $BuildDir + } + if ($BuildOk) { Write-Host "" Write-Host "--- cmake configure ---" -ForegroundColor Cyan @@ -1120,33 +1140,35 @@ if (Test-Path $LlamaServerBin) { $CmakeArgs += '-DLLAMA_CURL=OFF' } $CmakeArgs += '-DCMAKE_EXE_LINKER_FLAGS=/NODEFAULTLIB:LIBCMT' - # CUDA flags (Unsloth-aligned) -- only if GPU available + # CUDA flags -- only if GPU available, otherwise explicitly disable if ($HasNvidiaSmi -and $NvccPath) { - $CmakeArgs += '-DGGML_CUDA=ON' - $CmakeArgs += "-DCUDAToolkit_ROOT=$CudaToolkitRoot" - $CmakeArgs += "-DCUDA_TOOLKIT_ROOT_DIR=$CudaToolkitRoot" - $CmakeArgs += "-DCMAKE_CUDA_COMPILER=$NvccPath" - $CmakeArgs += '-DGGML_CUDA_FA_ALL_QUANTS=ON' - $CmakeArgs += '-DGGML_CUDA_F16=OFF' - $CmakeArgs += '-DGGML_CUDA_GRAPHS=OFF' - $CmakeArgs += '-DGGML_CUDA_FORCE_CUBLAS=OFF' - $CmakeArgs += '-DGGML_CUDA_PEER_MAX_BATCH_SIZE=8192' - if ($CudaArch) { - # Validate nvcc actually supports this architecture - if (Test-NvccArchSupport -NvccExe $NvccPath -Arch $CudaArch) { - $CmakeArgs += "-DCMAKE_CUDA_ARCHITECTURES=$CudaArch" - } else { - # GPU arch too new for this toolkit — fall back to highest supported. - # PTX forward-compatibility will JIT-compile for the actual GPU at runtime. - $maxArch = Get-NvccMaxArch -NvccExe $NvccPath - if ($maxArch) { - $CmakeArgs += "-DCMAKE_CUDA_ARCHITECTURES=$maxArch" - Write-Host " [WARN] GPU is sm_$CudaArch but nvcc only supports up to sm_$maxArch" -ForegroundColor Yellow - Write-Host " Building with sm_$maxArch (PTX will JIT for your GPU at runtime)" -ForegroundColor Yellow + $CmakeArgs += '-DGGML_CUDA=ON' + $CmakeArgs += "-DCUDAToolkit_ROOT=$CudaToolkitRoot" + $CmakeArgs += "-DCUDA_TOOLKIT_ROOT_DIR=$CudaToolkitRoot" + $CmakeArgs += "-DCMAKE_CUDA_COMPILER=$NvccPath" + $CmakeArgs += '-DGGML_CUDA_FA_ALL_QUANTS=ON' + $CmakeArgs += '-DGGML_CUDA_F16=OFF' + $CmakeArgs += '-DGGML_CUDA_GRAPHS=OFF' + $CmakeArgs += '-DGGML_CUDA_FORCE_CUBLAS=OFF' + $CmakeArgs += '-DGGML_CUDA_PEER_MAX_BATCH_SIZE=8192' + if ($CudaArch) { + # Validate nvcc actually supports this architecture + if (Test-NvccArchSupport -NvccExe $NvccPath -Arch $CudaArch) { + $CmakeArgs += "-DCMAKE_CUDA_ARCHITECTURES=$CudaArch" + } else { + # GPU arch too new for this toolkit -- fall back to highest supported. + # PTX forward-compatibility will JIT-compile for the actual GPU at runtime. + $maxArch = Get-NvccMaxArch -NvccExe $NvccPath + if ($maxArch) { + $CmakeArgs += "-DCMAKE_CUDA_ARCHITECTURES=$maxArch" + Write-Host " [WARN] GPU is sm_$CudaArch but nvcc only supports up to sm_$maxArch" -ForegroundColor Yellow + Write-Host " Building with sm_$maxArch (PTX will JIT for your GPU at runtime)" -ForegroundColor Yellow + } + # else: omit flag entirely, let cmake pick defaults } - # else: omit flag entirely, let cmake pick defaults } - } + } else { + $CmakeArgs += '-DGGML_CUDA=OFF' } $cmakeOutput = cmake @CmakeArgs 2>&1 | Out-String From 7125772c0adff14176e201d0da1d5f4bca3c03db Mon Sep 17 00:00:00 2001 From: Daniel Han <23090290+danielhanchen@users.noreply.github.com> Date: Wed, 18 Mar 2026 06:16:37 +0000 Subject: [PATCH 7/9] Remove extra CUDA cmake flags to align Windows with Linux build Drop GGML_CUDA_FA_ALL_QUANTS, GGML_CUDA_F16, GGML_CUDA_GRAPHS, GGML_CUDA_FORCE_CUBLAS, and GGML_CUDA_PEER_MAX_BATCH_SIZE flags. The Linux build in setup.sh only sets GGML_CUDA=ON and lets llama.cpp use its defaults for everything else. Keep Windows consistent. --- studio/setup.ps1 | 5 ----- 1 file changed, 5 deletions(-) diff --git a/studio/setup.ps1 b/studio/setup.ps1 index 87bc43b87f..667460ebd3 100644 --- a/studio/setup.ps1 +++ b/studio/setup.ps1 @@ -1146,11 +1146,6 @@ if (Test-Path $LlamaServerBin) { $CmakeArgs += "-DCUDAToolkit_ROOT=$CudaToolkitRoot" $CmakeArgs += "-DCUDA_TOOLKIT_ROOT_DIR=$CudaToolkitRoot" $CmakeArgs += "-DCMAKE_CUDA_COMPILER=$NvccPath" - $CmakeArgs += '-DGGML_CUDA_FA_ALL_QUANTS=ON' - $CmakeArgs += '-DGGML_CUDA_F16=OFF' - $CmakeArgs += '-DGGML_CUDA_GRAPHS=OFF' - $CmakeArgs += '-DGGML_CUDA_FORCE_CUBLAS=OFF' - $CmakeArgs += '-DGGML_CUDA_PEER_MAX_BATCH_SIZE=8192' if ($CudaArch) { # Validate nvcc actually supports this architecture if (Test-NvccArchSupport -NvccExe $NvccPath -Arch $CudaArch) { From 8035e9caf90e27d73f280bf78f2f410b8efd2252 Mon Sep 17 00:00:00 2001 From: Daniel Han <23090290+danielhanchen@users.noreply.github.com> Date: Wed, 18 Mar 2026 06:28:33 +0000 Subject: [PATCH 8/9] Address reviewer round 2: GPU probe fallback, Triton check, stale binary rebuild 1. GPU detection: fallback to default nvidia-smi install locations (Program Files\NVIDIA Corporation\NVSMI, System32) when nvidia-smi is not on PATH. Prevents silent CPU-only provisioning on machines that have a GPU but a broken PATH. 2. Triton: check $LASTEXITCODE after pip install and print [WARN] on failure instead of unconditional [OK]. 3. Stale llama-server: check CMakeCache.txt for GGML_CUDA setting and rebuild if the existing binary does not match the current GPU mode (e.g. CUDA binary on a now-CPU-only rerun, or vice versa). --- studio/setup.ps1 | 47 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 45 insertions(+), 2 deletions(-) diff --git a/studio/setup.ps1 b/studio/setup.ps1 index 667460ebd3..c2203fc5ca 100644 --- a/studio/setup.ps1 +++ b/studio/setup.ps1 @@ -251,13 +251,35 @@ Write-Host "+==============================================+" -ForegroundColor G # ========================================================================== # ============================================ -# 1a. GPU requirement check +# 1a. GPU detection # ============================================ $HasNvidiaSmi = $false try { nvidia-smi 2>&1 | Out-Null if ($LASTEXITCODE -eq 0) { $HasNvidiaSmi = $true } } catch {} +# Fallback: nvidia-smi may not be on PATH even though a GPU + driver exist. +# Check the default install location and the Windows driver store. +if (-not $HasNvidiaSmi) { + $nvSmiDefaults = @( + "$env:ProgramFiles\NVIDIA Corporation\NVSMI\nvidia-smi.exe", + "$env:SystemRoot\System32\nvidia-smi.exe" + ) + foreach ($p in $nvSmiDefaults) { + if (Test-Path $p) { + try { + & $p 2>&1 | Out-Null + if ($LASTEXITCODE -eq 0) { + $nvSmiDir = Split-Path $p -Parent + $env:Path = "$nvSmiDir;$env:Path" + $HasNvidiaSmi = $true + Write-Host " Found nvidia-smi at $nvSmiDir (added to PATH)" -ForegroundColor Gray + break + } + } catch {} + } + } +} if (-not $HasNvidiaSmi) { Write-Host "" Write-Host "[WARN] No NVIDIA GPU detected. Studio will run in chat-only (GGUF) mode." -ForegroundColor Yellow @@ -933,7 +955,11 @@ if ($HasNvidiaSmi) { # Install Triton for Windows (enables torch.compile -- without it training can hang) Write-Host " Installing Triton for Windows..." -ForegroundColor Cyan pip install "triton-windows<3.7" 2>&1 | Out-Null - Write-Host "[OK] Triton for Windows installed (enables torch.compile)" -ForegroundColor Green + if ($LASTEXITCODE -ne 0) { + Write-Host "[WARN] Triton install failed -- torch.compile may not work" -ForegroundColor Yellow + } else { + Write-Host "[OK] Triton for Windows installed (enables torch.compile)" -ForegroundColor Green + } } else { Write-Host " Installing PyTorch (CPU-only)..." -ForegroundColor Cyan pip install torch torchvision torchaudio @@ -1038,7 +1064,24 @@ $LlamaServerBin = Join-Path $BuildDir "bin\Release\llama-server.exe" $HasCmakeForBuild = $null -ne (Get-Command cmake -ErrorAction SilentlyContinue) +# Check if existing llama-server matches current GPU mode. A CUDA-built binary +# on a now-CPU-only machine (or vice versa) needs to be rebuilt. +$NeedRebuild = $false if (Test-Path $LlamaServerBin) { + $CmakeCacheFile = Join-Path $BuildDir "CMakeCache.txt" + if (Test-Path $CmakeCacheFile) { + $cachedCuda = Select-String -Path $CmakeCacheFile -Pattern 'GGML_CUDA:BOOL=ON' -Quiet + if ($HasNvidiaSmi -and -not $cachedCuda) { + Write-Host " Existing llama-server is CPU-only but GPU is available -- rebuilding" -ForegroundColor Yellow + $NeedRebuild = $true + } elseif (-not $HasNvidiaSmi -and $cachedCuda) { + Write-Host " Existing llama-server was built with CUDA but no GPU detected -- rebuilding" -ForegroundColor Yellow + $NeedRebuild = $true + } + } +} + +if ((Test-Path $LlamaServerBin) -and -not $NeedRebuild) { Write-Host "" Write-Host "[OK] llama-server already exists at $LlamaServerBin" -ForegroundColor Green } elseif (-not $HasCmakeForBuild) { From 69d99a44fef4ed95262a90b74481e126410e3ffd Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Wed, 18 Mar 2026 07:41:40 +0000 Subject: [PATCH 9/9] Fix nvidia-smi PATH persistence and cmake requirement for CPU-only 1. Store nvidia-smi as an absolute path ($NvidiaSmiExe) on first detection. All later calls (Get-CudaComputeCapability, Get-PytorchCudaTag, CUDA toolkit detection) use this absolute path instead of relying on PATH. This survives Refresh-Environment which rebuilds PATH from the registry and drops process-only additions. 2. Make cmake fatal for CPU-only installs. CPU-only machines depend entirely on llama-server for GGUF chat mode, so reporting "Setup Complete!" without it is misleading. GPU machines can still skip the llama-server build since they have other inference paths. --- studio/setup.ps1 | 45 ++++++++++++++++++++++++++++++++------------- 1 file changed, 32 insertions(+), 13 deletions(-) diff --git a/studio/setup.ps1 b/studio/setup.ps1 index c2203fc5ca..dac308966f 100644 --- a/studio/setup.ps1 +++ b/studio/setup.ps1 @@ -107,11 +107,15 @@ function Find-Nvcc { # Returns e.g. "80" for A100 (8.0), "89" for RTX 4090 (8.9), etc. # Returns $null if detection fails. function Get-CudaComputeCapability { - $nvSmi = Get-Command nvidia-smi -ErrorAction SilentlyContinue - if (-not $nvSmi) { return $null } + # Use the resolved absolute path ($NvidiaSmiExe) to survive Refresh-Environment + $smiExe = if ($script:NvidiaSmiExe) { $script:NvidiaSmiExe } else { + $cmd = Get-Command nvidia-smi -ErrorAction SilentlyContinue + if ($cmd) { $cmd.Source } else { $null } + } + if (-not $smiExe) { return $null } try { - $raw = & nvidia-smi --query-gpu=compute_cap --format=csv,noheader 2>$null + $raw = & $smiExe --query-gpu=compute_cap --format=csv,noheader 2>$null if ($LASTEXITCODE -ne 0 -or -not $raw) { return $null } # nvidia-smi may return multiple GPUs; take the first one @@ -168,14 +172,17 @@ function Get-NvccMaxArch { # https://download.pytorch.org/whl/. The tag must not exceed the driver's # capability: e.g. driver "CUDA Version: 12.9" → cu128 (not cu130). function Get-PytorchCudaTag { - $nvSmi = Get-Command nvidia-smi -ErrorAction SilentlyContinue - if (-not $nvSmi) { return "cu124" } + $smiExe = if ($script:NvidiaSmiExe) { $script:NvidiaSmiExe } else { + $cmd = Get-Command nvidia-smi -ErrorAction SilentlyContinue + if ($cmd) { $cmd.Source } else { $null } + } + if (-not $smiExe) { return "cu124" } try { # 2>&1 | Out-String merges stderr into stdout then converts to a single - # string. Plain 2>$null doesn't fully suppress stderr in PS 5.1 — + # string. Plain 2>$null doesn't fully suppress stderr in PS 5.1 -- # ErrorRecord objects leak into $output and break the -match. - $output = & nvidia-smi 2>&1 | Out-String + $output = & $smiExe 2>&1 | Out-String if ($output -match 'CUDA Version:\s+(\d+)\.(\d+)') { $major = [int]$Matches[1] $minor = [int]$Matches[2] @@ -254,9 +261,16 @@ Write-Host "+==============================================+" -ForegroundColor G # 1a. GPU detection # ============================================ $HasNvidiaSmi = $false +$NvidiaSmiExe = $null # Absolute path -- survives Refresh-Environment try { - nvidia-smi 2>&1 | Out-Null - if ($LASTEXITCODE -eq 0) { $HasNvidiaSmi = $true } + $nvSmiCmd = Get-Command nvidia-smi -ErrorAction SilentlyContinue + if ($nvSmiCmd) { + & $nvSmiCmd.Source 2>&1 | Out-Null + if ($LASTEXITCODE -eq 0) { + $HasNvidiaSmi = $true + $NvidiaSmiExe = $nvSmiCmd.Source + } + } } catch {} # Fallback: nvidia-smi may not be on PATH even though a GPU + driver exist. # Check the default install location and the Windows driver store. @@ -270,10 +284,9 @@ if (-not $HasNvidiaSmi) { try { & $p 2>&1 | Out-Null if ($LASTEXITCODE -eq 0) { - $nvSmiDir = Split-Path $p -Parent - $env:Path = "$nvSmiDir;$env:Path" $HasNvidiaSmi = $true - Write-Host " Found nvidia-smi at $nvSmiDir (added to PATH)" -ForegroundColor Gray + $NvidiaSmiExe = $p + Write-Host " Found nvidia-smi at $(Split-Path $p -Parent)" -ForegroundColor Gray break } } catch {} @@ -443,7 +456,7 @@ if ($HasNvidiaSmi) { # -- Detect max CUDA version the driver supports -- $DriverMaxCuda = $null try { - $smiOut = nvidia-smi 2>&1 | Out-String + $smiOut = & $NvidiaSmiExe 2>&1 | Out-String if ($smiOut -match "CUDA Version:\s+([\d]+)\.([\d]+)") { $DriverMaxCuda = "$($Matches[1]).$($Matches[2])" Write-Host " Driver supports up to CUDA $DriverMaxCuda" -ForegroundColor Gray @@ -1086,6 +1099,12 @@ if ((Test-Path $LlamaServerBin) -and -not $NeedRebuild) { Write-Host "[OK] llama-server already exists at $LlamaServerBin" -ForegroundColor Green } elseif (-not $HasCmakeForBuild) { Write-Host "" + if (-not $HasNvidiaSmi) { + # CPU-only machines depend entirely on llama-server for GGUF chat -- cmake is required + Write-Host "[ERROR] CMake is required to build llama-server for GGUF chat mode." -ForegroundColor Red + Write-Host " Install CMake from https://cmake.org/download/ and re-run setup." -ForegroundColor Yellow + exit 1 + } Write-Host "[SKIP] llama-server build -- cmake not available" -ForegroundColor Yellow Write-Host " GGUF inference and export will not be available." -ForegroundColor Yellow Write-Host " Install CMake from https://cmake.org/download/ and re-run setup." -ForegroundColor Yellow