From 20a471766284f72b6dc2c06a208aa03dc73edad1 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Wed, 25 Mar 2026 10:25:18 +0000
Subject: [PATCH] Add ROCm (AMD GPU) support to studio setup

Add GPU backend detection for ROCm alongside existing CUDA support in
the llama.cpp build step. When hipcc is found and no CUDA toolchain is
present, the build uses GGML_HIP=ON with proper ROCm root resolution
via readlink/hipconfig, sets ROCM_PATH and HIP_PATH, uses the
upstream-recommended HIPCXX compiler path, and auto-detects the gfx
architecture via rocminfo.

CUDA detection is unchanged and takes priority on mixed-toolchain hosts.
---
 studio/setup.sh | 75 ++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 71 insertions(+), 4 deletions(-)

diff --git a/studio/setup.sh b/studio/setup.sh
index 851fcadc81..5b57cfb922 100755
--- a/studio/setup.sh
+++ b/studio/setup.sh
@@ -449,20 +449,43 @@ rm -rf "$LLAMA_CPP_DIR"
                 echo "   Using ccache for faster compilation"
             fi
 
-            # Detect CUDA: check nvcc on PATH, then common install locations
+            # Detect GPU backend: CUDA (NVIDIA) or ROCm (AMD)
+            GPU_BACKEND=""
+
+            # Check for CUDA: check nvcc on PATH, then common install locations
             NVCC_PATH=""
             if command -v nvcc &>/dev/null; then
                 NVCC_PATH="$(command -v nvcc)"
+                GPU_BACKEND="cuda"
             elif [ -x /usr/local/cuda/bin/nvcc ]; then
                 NVCC_PATH="/usr/local/cuda/bin/nvcc"
                 export PATH="/usr/local/cuda/bin:$PATH"
+                GPU_BACKEND="cuda"
             elif ls /usr/local/cuda-*/bin/nvcc &>/dev/null 2>&1; then
                 # Pick the newest cuda-XX.X directory
                 NVCC_PATH="$(ls -d /usr/local/cuda-*/bin/nvcc 2>/dev/null | sort -V | tail -1)"
                 export PATH="$(dirname "$NVCC_PATH"):$PATH"
+                GPU_BACKEND="cuda"
+            fi
+
+            # Check for ROCm (AMD) only if CUDA was not already selected
+            ROCM_HIPCC=""
+            if [ -z "$GPU_BACKEND" ]; then
+                if command -v hipcc &>/dev/null; then
+                    ROCM_HIPCC="$(command -v hipcc)"
+                    GPU_BACKEND="rocm"
+                elif [ -x /opt/rocm/bin/hipcc ]; then
+                    ROCM_HIPCC="/opt/rocm/bin/hipcc"
+                    export PATH="/opt/rocm/bin:$PATH"
+                    GPU_BACKEND="rocm"
+                elif ls /opt/rocm-*/bin/hipcc &>/dev/null 2>&1; then
+                    ROCM_HIPCC="$(ls -d /opt/rocm-*/bin/hipcc 2>/dev/null | sort -V | tail -1)"
+                    export PATH="$(dirname "$ROCM_HIPCC"):$PATH"
+                    GPU_BACKEND="rocm"
+                fi
             fi
 
-            if [ -n "$NVCC_PATH" ]; then
+            if [ "$GPU_BACKEND" = "cuda" ]; then
                 echo "   Building with CUDA support (nvcc: $NVCC_PATH)..."
                 CMAKE_ARGS="$CMAKE_ARGS -DGGML_CUDA=ON"
 
@@ -494,11 +517,55 @@ rm -rf "$LLAMA_CPP_DIR"
 
                 # Multi-threaded nvcc compilation (uses all CPU cores per .cu file)
                 CMAKE_ARGS="$CMAKE_ARGS -DCMAKE_CUDA_FLAGS=--threads=0"
-            elif [ -d /usr/local/cuda ] || nvidia-smi &>/dev/null; then
+            elif [ "$GPU_BACKEND" = "rocm" ]; then
+                # Resolve hipcc symlinks to find the real ROCm root
+                _HIPCC_REAL="$(readlink -f "$ROCM_HIPCC" 2>/dev/null || printf '%s' "$ROCM_HIPCC")"
+                ROCM_ROOT=""
+                if command -v hipconfig &>/dev/null; then
+                    ROCM_ROOT="$(hipconfig -R 2>/dev/null || true)"
+                fi
+                if [ -z "$ROCM_ROOT" ]; then
+                    ROCM_ROOT="$(cd "$(dirname "$_HIPCC_REAL")/.." 2>/dev/null && pwd)"
+                fi
+
+                echo "   Building with ROCm support (AMD GPU, hipcc: $_HIPCC_REAL)..."
+                CMAKE_ARGS="$CMAKE_ARGS -DGGML_HIP=ON"
+                export ROCM_PATH="$ROCM_ROOT"
+                export HIP_PATH="$ROCM_ROOT"
+
+                # Use upstream-recommended HIP compiler (not legacy hipcc-as-CXX)
+                if command -v hipconfig &>/dev/null; then
+                    _HIP_CLANG_DIR="$(hipconfig -l 2>/dev/null || true)"
+                    [ -n "$_HIP_CLANG_DIR" ] && export HIPCXX="$_HIP_CLANG_DIR/clang"
+                fi
+
+                # Detect AMD GPU architecture (gfx target)
+                GPU_TARGETS=""
+                if command -v rocminfo &>/dev/null; then
+                    _gfx_list=$(rocminfo 2>/dev/null | grep -oE 'gfx[0-9]{3,4}[a-z]?' | sort -u || true)
+                    _valid_gfx=""
+                    for _gfx in $_gfx_list; do
+                        if [[ "$_gfx" =~ ^gfx[0-9]{3,4}[a-z]?$ ]]; then
+                            _valid_gfx="${_valid_gfx}${_valid_gfx:+;}$_gfx"
+                        fi
+                    done
+                    [ -n "$_valid_gfx" ] && GPU_TARGETS="$_valid_gfx"
+                fi
+
+                if [ -n "$GPU_TARGETS" ]; then
+                    echo "   AMD GPU architectures: ${GPU_TARGETS//;/, } -- limiting build to detected targets"
+                    CMAKE_ARGS="$CMAKE_ARGS -DGPU_TARGETS=${GPU_TARGETS}"
+                else
+                    echo "   Could not detect AMD GPU arch -- building for default targets (cmake will auto-detect)"
+                fi
+            elif [ -d /usr/local/cuda ] || command -v nvidia-smi &>/dev/null; then
                 echo "   CUDA driver detected but nvcc not found — building CPU-only"
                 echo "   To enable GPU: install cuda-toolkit or add nvcc to PATH"
+            elif [ -d /opt/rocm ] || command -v rocm-smi &>/dev/null; then
+                echo "   ROCm driver detected but hipcc not found — building CPU-only"
+                echo "   To enable GPU: install rocm-dev or add hipcc to PATH"
             else
-                echo "   Building CPU-only (no CUDA detected)..."
+                echo "   Building CPU-only (no CUDA or ROCm detected)..."
             fi
 
             NCPU=$(nproc 2>/dev/null || sysctl -n hw.ncpu 2>/dev/null || echo 4)