Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 17 additions & 64 deletions .github/workflows/build-llamacpp-rocm.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@ on:
gfx_target:
description: 'AMD GPU targets (comma-separated)'
required: false
default: 'gfx1151,gfx120X,gfx110X'
default: 'gfx1151,gfx1150,gfx120X,gfx110X'
rocm_version:
description: 'ROCm version to use (e.g., 7.10.0a20251022) or "latest" to auto-detect'
description: 'ROCm version to use (e.g., 7.11.0a20251205) or "latest" to auto-detect'
required: false
default: 'latest'
llamacpp_version:
Expand All @@ -36,7 +36,7 @@ on:

env:
OPERATING_SYSTEMS: ${{ github.event.inputs.operating_systems || 'windows,ubuntu' }}
GFX_TARGETS: ${{ github.event.inputs.gfx_target || 'gfx1151,gfx120X,gfx110X' }}
GFX_TARGETS: ${{ github.event.inputs.gfx_target || 'gfx1151,gfx1150,gfx120X,gfx110X' }}
ROCM_VERSION: ${{ github.event.inputs.rocm_version || 'latest' }}
LLAMACPP_VERSION: ${{ github.event.inputs.llamacpp_version || 'latest' }}

Expand Down Expand Up @@ -241,31 +241,7 @@ jobs:

# Extract the tarball
tar -xzf rocm.tar.gz -C C:\opt\rocm --strip-components=1

#- name: Install rocWMMA headers (develop)
# run: |
# $repoUrl = "https://github.com/ROCm/rocWMMA.git"
# $cloneDir = Join-Path $env:TEMP "rocWMMA-develop"
# $destIncludeDir = "C:\opt\rocm\include"
# $destHeaders = Join-Path $destIncludeDir "rocwmma"
#
# if (Test-Path $cloneDir) { Remove-Item -Recurse -Force $cloneDir }
# Write-Host "Cloning rocWMMA develop branch..."
# git clone --depth 1 --single-branch --branch develop $repoUrl $cloneDir
#
# $srcHeaders = Join-Path $cloneDir "library\include\rocwmma"
# if (!(Test-Path $srcHeaders)) {
# Write-Error "rocWMMA headers not found at expected path: $srcHeaders"
# exit 1
# }
#
# Write-Host "Installing headers to $destHeaders ..."
# New-Item -ItemType Directory -Force -Path $destIncludeDir | Out-Null
# if (Test-Path $destHeaders) { Remove-Item -Recurse -Force $destHeaders }
# Copy-Item -Recurse -Force $srcHeaders $destHeaders
#
# Write-Host "rocWMMA headers (develop) installed successfully."


- name: Clone llama.cpp
run: |
$llamacppVersion = "${{ env.LLAMACPP_VERSION }}"
Expand All @@ -287,13 +263,6 @@ jobs:
Write-Host "Current llama.cpp commit:"
git log --oneline -1

# - name: Apply rocWMMA patch (non-interactive)
# shell: bash
# run: |
# chmod +x utils/rocwmma_patch.sh
# # Run non-interactively; keep backups (answer 'n')
# printf "n\n" | ./utils/rocwmma_patch.sh ./llama.cpp

- name: Build Llama.cpp + ROCm
shell: cmd
run: |
Expand All @@ -306,6 +275,8 @@ jobs:
set "mapped_target=gfx1100;gfx1101;gfx1102"
) else if "%current_target%"=="gfx1151" (
set "mapped_target=gfx1151"
) else if "%current_target%"=="gfx1150" (
set "mapped_target=gfx1150"
) else if "%current_target%"=="gfx120X" (
set "mapped_target=gfx1200;gfx1201"
) else (
Expand All @@ -332,15 +303,16 @@ jobs:
cmake .. -G Ninja ^
-DCMAKE_C_COMPILER="C:\opt\rocm\lib\llvm\bin\clang.exe" ^
-DCMAKE_CXX_COMPILER="C:\opt\rocm\lib\llvm\bin\clang++.exe" ^
-DCMAKE_CXX_FLAGS="-IC:\opt\rocm\include" ^
-DCMAKE_CROSSCOMPILING=ON ^
-DCMAKE_BUILD_TYPE=Release ^
-DAMDGPU_TARGETS="%mapped_target%" ^
-DGPU_TARGETS="%mapped_target%" ^
-DBUILD_SHARED_LIBS=ON ^
-DLLAMA_BUILD_TESTS=OFF ^
-DGGML_HIP=ON ^
-DGGML_OPENMP=OFF ^
-DGGML_CUDA_FORCE_CUBLAS=OFF ^
-DGGML_HIP_ROCWMMA_FATTN=OFF ^
-DGGML_HIP_ROCWMMA_FATTN=ON ^
-DLLAMA_CURL=OFF ^
-DGGML_NATIVE=OFF ^
-DGGML_STATIC=OFF ^
Expand Down Expand Up @@ -687,24 +659,7 @@ jobs:
echo "PKG_CONFIG_PATH=/opt/rocm/lib/pkgconfig:${PKG_CONFIG_PATH:-}" >> $GITHUB_ENV

echo "ROCm environment variables set successfully"

#- name: Install rocWMMA headers (develop)
# run: |
# set -e
# TMP_DIR="$(mktemp -d)"
# echo "Using temp directory: $TMP_DIR"
# git clone --depth 1 --single-branch --branch develop https://github.com/ROCm/rocWMMA.git "$TMP_DIR/rocWMMA"
# SRC_DIR="$TMP_DIR/rocWMMA/library/include/rocwmma"
# if [ ! -d "$SRC_DIR" ]; then
# echo "rocWMMA headers not found at expected path: $SRC_DIR"
# exit 1
# fi
# echo "Installing headers to /opt/rocm/include/rocwmma ..."
# sudo mkdir -p /opt/rocm/include
# sudo rm -rf /opt/rocm/include/rocwmma
# sudo cp -r "$SRC_DIR" /opt/rocm/include/
# rm -rf "$TMP_DIR"
# echo "rocWMMA headers (develop) installed successfully."


- name: Clone llama.cpp
run: |
Expand All @@ -727,11 +682,6 @@ jobs:
echo "Current llama.cpp commit:"
git log --oneline -1

#- name: Apply rocWMMA patch (non-interactive)
# run: |
# chmod +x utils/rocwmma_patch.sh
# # Run non-interactively; keep backups (answer 'n')
# printf "n\n" | ./utils/rocwmma_patch.sh ./llama.cpp

- name: Build Llama.cpp + ROCm
run: |
Expand All @@ -743,6 +693,8 @@ jobs:
mapped_target="gfx1100;gfx1101;gfx1102"
elif [ "$current_target" = "gfx1151" ]; then
mapped_target="gfx1151"
elif [ "$current_target" = "gfx1150" ]; then
mapped_target="gfx1150"
elif [ "$current_target" = "gfx120X" ]; then
mapped_target="gfx1200;gfx1201"
else
Expand All @@ -759,15 +711,16 @@ jobs:
cmake .. -G Ninja \
-DCMAKE_C_COMPILER=/opt/rocm/llvm/bin/clang \
-DCMAKE_CXX_COMPILER=/opt/rocm/llvm/bin/clang++ \
-DCMAKE_CXX_FLAGS="-I/opt/rocm/include" \
-DCMAKE_CROSSCOMPILING=ON \
-DCMAKE_BUILD_TYPE=Release \
-DAMDGPU_TARGETS="$mapped_target" \
-DGPU_TARGETS="$mapped_target" \
-DBUILD_SHARED_LIBS=ON \
-DLLAMA_BUILD_TESTS=OFF \
-DGGML_HIP=ON \
-DGGML_OPENMP=OFF \
-DGGML_CUDA_FORCE_CUBLAS=OFF \
-DGGML_HIP_ROCWMMA_FATTN=OFF \
-DGGML_HIP_ROCWMMA_FATTN=ON \
-DLLAMA_CURL=OFF \
-DGGML_NATIVE=OFF \
-DGGML_STATIC=OFF \
Expand Down Expand Up @@ -931,7 +884,7 @@ jobs:
needs: [prepare-matrix, build-windows]
if: |
needs.build-windows.result == 'success' &&
contains(github.event.inputs.gfx_target || 'gfx1151,gfx120X,gfx110X', 'gfx1151') &&
contains(github.event.inputs.gfx_target || 'gfx1151,gfx1150,gfx120X,gfx110X', 'gfx1151') &&
needs.prepare-matrix.outputs.should_build_windows == 'true'

steps:
Expand Down Expand Up @@ -1053,7 +1006,7 @@ jobs:
needs: [prepare-matrix, build-ubuntu]
if: |
needs.build-ubuntu.result == 'success' &&
contains(github.event.inputs.gfx_target || 'gfx1151,gfx120X,gfx110X', 'gfx1151') &&
contains(github.event.inputs.gfx_target || 'gfx1151,gfx1150,gfx120X,gfx110X', 'gfx1151') &&
needs.prepare-matrix.outputs.should_build_ubuntu == 'true'

steps:
Expand Down
6 changes: 4 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@ We provide nightly builds of **llama.cpp** with **AMD ROCm™ 7** acceleration b
## 🎯 Supported Devices

This build specifically targets the following GPU architectures:
- **gfx1151** (STX Halo GPUs) - Ryzen AI MAX+ Pro 395
- **gfx1151** (STX Halo APU) - Ryzen AI MAX+ Pro 395
- **gfx1150** (STX Point APU) - Ryzen AI 300
- **gfx120X** (RDNA4 GPUs) - includes AMD Radeon RX 9070 XT/GRE/9070, RX 9060 XT/9060
- **gfx110X** (RDNA3 GPUs) - includes AMD Radeon PRO W7900/W7800/W7700/W7600, RX 7900 XTX/XT/GRE, RX 7800 XT, RX 7700 XT/7700, RX 7600 XT/7600

Expand All @@ -41,13 +42,14 @@ This build specifically targets the following GPU architectures:

Our automated GitHub Actions workflow creates nightly builds for:
- **Windows** and **Ubuntu** operating systems
- **Multiple GPU targets**: `gfx1151`, `gfx120X`, `gfx110X`
- **Multiple GPU targets**: `gfx1151`, `gfx1150`, `gfx110X`, `gfx120X`
- **ROCm™ 7 built-in** - complete runtime libraries included


| GPU Target | Ubuntu | Windows |
|-------------|--------|---------|
| **gfx110X** | [![Download Ubuntu gfx110X](https://img.shields.io/badge/Download-Ubuntu%20gfx110X-blue)](https://github.com/aigdat/llamacpp-rocm/releases/latest) | [![Download Windows gfx110X](https://img.shields.io/badge/Download-Windows%20gfx110X-green)](https://github.com/aigdat/llamacpp-rocm/releases/latest) |
| **gfx1150** | [![Download Ubuntu gfx1150](https://img.shields.io/badge/Download-Ubuntu%20gfx1150-blue)](https://github.com/aigdat/llamacpp-rocm/releases/latest) | [![Download Windows gfx1150](https://img.shields.io/badge/Download-Windows%20gfx1150-green)](https://github.com/aigdat/llamacpp-rocm/releases/latest) |
| **gfx1151** | [![Download Ubuntu gfx1151](https://img.shields.io/badge/Download-Ubuntu%20gfx1151-blue)](https://github.com/aigdat/llamacpp-rocm/releases/latest) | [![Download Windows gfx1151](https://img.shields.io/badge/Download-Windows%20gfx1151-green)](https://github.com/aigdat/llamacpp-rocm/releases/latest) |
| **gfx120X** | [![Download Ubuntu gfx120X](https://img.shields.io/badge/Download-Ubuntu%20gfx120X-blue)](https://github.com/aigdat/llamacpp-rocm/releases/latest) | [![Download Windows gfx120X](https://img.shields.io/badge/Download-Windows%20gfx120X-green)](https://github.com/aigdat/llamacpp-rocm/releases/latest) |

Expand Down
44 changes: 23 additions & 21 deletions docs/manual_instructions.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,16 +31,13 @@ I used chocolatey, but you can also install those manually.

### Part 2: Organizing artifacts
* Step 1: Get the latest run id from main [here](https://github.com/ROCm/TheRock/actions/workflows/release_windows_packages.yml).
* Example: [TheRock/actions/runs/16218534118/job/45793425858](https://github.com/ROCm/TheRock/actions/runs/16218534118/job/45793425858)
* Example: [TheRock/actions/runs/19952310972/job/57214748637](https://github.com/ROCm/TheRock/actions/runs/19952310972/job/57214748637)
* Step 2: Look at the upload logs for your target GPU (e.g., `gfx1151`), and note the Windows URL:
```
://therock-nightly-tarball/therock-dist-windows-gfx1151-7.0.0rc20250711.tar.gz
```
* Step 4: Download the nightly tarball
* Example: `therock-nightly-tarball.s3.amazonaws.com/YOUR_FILE`
* Step 5: Extract the contents of this tar.gz file to `C:\opt\rocm`
* Setp 6: Add `C:\opt\rocm\lib\llvm\bin` to path
* Step 7: clone llamacpp
* Step 3: Download the nightly tarball for Windows
* Example: [therock-nightly-tarball.s3.amazonaws.com/YOUR_WINDOWS_FILE](https://therock-nightly-tarball.s3.amazonaws.com/therock-dist-windows-gfx1151-7.11.0a20251205.tar.gz)
* Step 4: Extract the contents of this tar.gz file to `C:\opt\rocm`
* Setp 5: Add `C:\opt\rocm\lib\llvm\bin` to path
* Step 6: clone llamacpp

### Part 3: Building Llama.cpp + ROCm

Expand All @@ -53,11 +50,11 @@ set HIP_PLATFORM=amd
cd "C:\<YOUR_LLAMACPP_PATH>\llama.cpp"
mkdir build
cd build
cmake .. -G Ninja -DCMAKE_C_COMPILER="C:\opt\rocm\lib\llvm\bin\clang.exe" -DCMAKE_CXX_COMPILER="C:\opt\rocm\lib\llvm\bin\clang++.exe" -DCMAKE_CROSSCOMPILING=ON -DCMAKE_BUILD_TYPE=Release -DAMDGPU_TARGETS="gfx1151" -DBUILD_SHARED_LIBS=ON -DLLAMA_BUILD_TESTS=OFF -DGGML_HIP=ON -DGGML_OPENMP=OFF -DGGML_CUDA_FORCE_CUBLAS=OFF -DGGML_HIP_ROCWMMA_FATTN=ON -DGGML_HIP_FORCE_ROCWMMA_FATTN_GFX12=OFF -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DGGML_STATIC=OFF -DCMAKE_SYSTEM_NAME=Windows
cmake .. -G Ninja -DCMAKE_C_COMPILER="C:\opt\rocm\lib\llvm\bin\clang.exe" -DCMAKE_CXX_COMPILER="C:\opt\rocm\lib\llvm\bin\clang++.exe" -DCMAKE_CXX_FLAGS="-IC:\opt\rocm\include" -DCMAKE_CROSSCOMPILING=ON -DCMAKE_BUILD_TYPE=Release -DGPU_TARGETS="gfx1151" -DBUILD_SHARED_LIBS=ON -DLLAMA_BUILD_TESTS=OFF -DGGML_HIP=ON -DGGML_OPENMP=OFF -DGGML_CUDA_FORCE_CUBLAS=OFF -DGGML_HIP_ROCWMMA_FATTN=ON -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DGGML_STATIC=OFF -DCMAKE_SYSTEM_NAME=Windows
cmake --build . -j 24 2>&1 | findstr /i "error"
```

> **Note**: Adjust the `-DAMDGPU_TARGETS="gfx1151"` parameter for your specific GPU. See the [GPU Target Reference](#gpu-target-reference) section for details.
> **Note**: Adjust the `-DGPU_TARGETS="gfx1151"` parameter for your specific GPU. See the [GPU Target Reference](#gpu-target-reference) section for details.

If you see no errors, that means that llama.cpp has correctly been built and files are available inside your `build\bin` folder.

Expand All @@ -82,14 +79,14 @@ sudo apt install -y cmake ninja-build git wget
* Step 1: Get the latest run id from main [here](https://github.com/ROCm/TheRock/actions/workflows/release_windows_packages.yml) (same as Windows).
* Step 2: Look at the upload logs for your target GPU (e.g., `gfx1151`), but note the Linux URLs:
```
://therock-nightly-tarball/therock-dist-linux-gfx1151-7.0.0rc20250711.tar.gz
://therock-nightly-tarball/therock-dist-linux-gfx1151-7.11.0a20251205.tar.gz
```
* Step 3: Download the nightly tarball for Linux
* Example: [therock-nightly-tarball.s3.amazonaws.com/YOUR_LINUX_FILE](https://therock-nightly-tarball.s3.amazonaws.com/therock-dist-linux-gfx1151-7.0.0rc20250711.tar.gz)
* Example: [therock-nightly-tarball.s3.amazonaws.com/YOUR_LINUX_FILE](https://therock-nightly-tarball.s3.amazonaws.com/therock-dist-linux-gfx1151-7.11.0a20251205.tar.gz)
* Step 4: Extract the contents of this tar.gz file to `/opt/rocm`:
```bash
sudo mkdir -p /opt/rocm
sudo tar -xzf therock-dist-linux-gfx1151-7.0.0rc20250711.tar.gz -C /opt/rocm --strip-components=1
sudo tar -xzf therock-dist-linux-gfx1151-7.11.0a20251205.tar.gz -C /opt/rocm --strip-components=1
```
* Step 5: Set up ROCm environment variables:
```bash
Expand Down Expand Up @@ -127,9 +124,10 @@ cd build
cmake .. -G Ninja \
-DCMAKE_C_COMPILER=/opt/rocm/llvm/bin/clang \
-DCMAKE_CXX_COMPILER=/opt/rocm/llvm/bin/clang++ \
-DCMAKE_CXX_FLAGS="-I/opt/rocm/include" \
-DCMAKE_CROSSCOMPILING=ON \
-DCMAKE_BUILD_TYPE=Release \
-DAMDGPU_TARGETS="gfx1151" \
-DGPU_TARGETS="gfx1151" \
-DBUILD_SHARED_LIBS=ON \
-DLLAMA_BUILD_TESTS=OFF \
-DGGML_HIP=ON \
Expand All @@ -145,7 +143,7 @@ cmake .. -G Ninja \
cmake --build . -j $(nproc)
```

> **Note**: Adjust the `-DAMDGPU_TARGETS="gfx1151"` parameter for your specific GPU. See the [GPU Target Reference](#gpu-target-reference) section for details.
> **Note**: Adjust the `-DGPU_TARGETS="gfx1151"` parameter for your specific GPU. See the [GPU Target Reference](#gpu-target-reference) section for details.

### Part 4: Copy required ROCm libraries

Expand Down Expand Up @@ -179,25 +177,29 @@ If you see no errors during the build process, llama.cpp has been successfully c

## 🎯 GPU Target Reference

When building llama.cpp with ROCm, the `-DAMDGPU_TARGETS` parameter must be set based on your specific GPU architecture. Our automated workflow uses generic targets that get mapped to specific architectures:
When building llama.cpp with ROCm, the `-DGPU_TARGETS` parameter must be set based on your specific GPU architecture. Our automated workflow uses generic targets that get mapped to specific architectures:

- **`gfx120X`** maps to `gfx1200, gfx1201` (RDNA4 series like: RX 9070 XT/GRE/9070, RX 9060 XT/9060)
- **`gfx110X`** maps to `gfx1100, gfx1101, gfx1102` (RDNA3 series like: PRO W7900/W7800/W7700/W7600, RX 7900 XTX/XT/GRE, RX 7800 XT, RX 7700 XT/7700, RX 7600 XT/7600)
- **`gfx1150`** remains as `gfx1150` (Strix Point)
- **`gfx1151`** remains as `gfx1151` (Strix Halo)

For a complete list of GPU targets and their mappings, see the [automated workflow](../.github/workflows/build-llamacpp-rocm.yml).

### How to Use

Replace the `-DAMDGPU_TARGETS="gfx1151"` parameter in your cmake command with the appropriate target for your GPU:
Replace the `-DGPU_TARGETS="gfx1151"` parameter in your cmake command with the appropriate target for your GPU:

```bash
# For RDNA4 series (RX 9070 XT/GRE/9070, RX 9060 XT/9060)
-DAMDGPU_TARGETS="gfx1200, gfx1201"
-DGPU_TARGETS="gfx1200, gfx1201"

# For RDNA3 series (PRO W7900/W7800/W7700/W7600, RX 7900 XTX/XT/GRE, RX 7800 XT, RX 7700 XT/7700, RX 7600 XT/7600)
-DAMDGPU_TARGETS="gfx1100"
-DGPU_TARGETS="gfx1100;gfx1101;gfx1102"

# For Strix Point
-DGPU_TARGETS="gfx1150"

# For Strix Halo
-DAMDGPU_TARGETS="gfx1151"
-DGPU_TARGETS="gfx1151"
```
Loading