lemonade-sdk · danielholanda · Dec 8, 2025 · Dec 5, 2025 · Dec 5, 2025 · Dec 5, 2025
diff --git a/.github/workflows/build-llamacpp-rocm.yml b/.github/workflows/build-llamacpp-rocm.yml
@@ -9,9 +9,9 @@ on:
       gfx_target:
         description: 'AMD GPU targets (comma-separated)'
         required: false
-        default: 'gfx1151,gfx120X,gfx110X'
+        default: 'gfx1151,gfx1150,gfx120X,gfx110X'
       rocm_version:
-        description: 'ROCm version to use (e.g., 7.10.0a20251022) or "latest" to auto-detect'
+        description: 'ROCm version to use (e.g., 7.11.0a20251205) or "latest" to auto-detect'
         required: false
         default: 'latest'
       llamacpp_version:
@@ -36,7 +36,7 @@ on:
 
 env:
   OPERATING_SYSTEMS: ${{ github.event.inputs.operating_systems || 'windows,ubuntu' }}
-  GFX_TARGETS: ${{ github.event.inputs.gfx_target || 'gfx1151,gfx120X,gfx110X' }}
+  GFX_TARGETS: ${{ github.event.inputs.gfx_target || 'gfx1151,gfx1150,gfx120X,gfx110X' }}
   ROCM_VERSION: ${{ github.event.inputs.rocm_version || 'latest' }}
   LLAMACPP_VERSION: ${{ github.event.inputs.llamacpp_version || 'latest' }}
 
@@ -241,31 +241,7 @@ jobs:
 
         # Extract the tarball
         tar -xzf rocm.tar.gz -C C:\opt\rocm --strip-components=1
-
-    #- name: Install rocWMMA headers (develop)
-    #  run: |
-    #    $repoUrl = "https://github.com/ROCm/rocWMMA.git"
-    #    $cloneDir = Join-Path $env:TEMP "rocWMMA-develop"
-    #    $destIncludeDir = "C:\opt\rocm\include"
-    #    $destHeaders = Join-Path $destIncludeDir "rocwmma"
-    #    
-    #    if (Test-Path $cloneDir) { Remove-Item -Recurse -Force $cloneDir }
-    #    Write-Host "Cloning rocWMMA develop branch..."
-    #    git clone --depth 1 --single-branch --branch develop $repoUrl $cloneDir
-    #    
-    #    $srcHeaders = Join-Path $cloneDir "library\include\rocwmma"
-    #    if (!(Test-Path $srcHeaders)) {
-    #      Write-Error "rocWMMA headers not found at expected path: $srcHeaders"
-    #      exit 1
-    #    }
-    #    
-    #    Write-Host "Installing headers to $destHeaders ..."
-    #    New-Item -ItemType Directory -Force -Path $destIncludeDir | Out-Null
-    #    if (Test-Path $destHeaders) { Remove-Item -Recurse -Force $destHeaders }
-    #    Copy-Item -Recurse -Force $srcHeaders $destHeaders
-    #    
-    #    Write-Host "rocWMMA headers (develop) installed successfully."
-
+
     - name: Clone llama.cpp
       run: |
         $llamacppVersion = "${{ env.LLAMACPP_VERSION }}"
@@ -287,13 +263,6 @@ jobs:
         Write-Host "Current llama.cpp commit:"
         git log --oneline -1
 
-    # - name: Apply rocWMMA patch (non-interactive)
-    #   shell: bash
-    #   run: |
-    #     chmod +x utils/rocwmma_patch.sh
-    #     # Run non-interactively; keep backups (answer 'n')
-    #     printf "n\n" | ./utils/rocwmma_patch.sh ./llama.cpp
-
     - name: Build Llama.cpp + ROCm
       shell: cmd
       run: |
@@ -306,6 +275,8 @@ jobs:
           set "mapped_target=gfx1100;gfx1101;gfx1102"
         ) else if "%current_target%"=="gfx1151" (
           set "mapped_target=gfx1151"
+        ) else if "%current_target%"=="gfx1150" (
+          set "mapped_target=gfx1150"
         ) else if "%current_target%"=="gfx120X" (
           set "mapped_target=gfx1200;gfx1201"
         ) else (
@@ -332,15 +303,16 @@ jobs:
         cmake .. -G Ninja ^
           -DCMAKE_C_COMPILER="C:\opt\rocm\lib\llvm\bin\clang.exe" ^
           -DCMAKE_CXX_COMPILER="C:\opt\rocm\lib\llvm\bin\clang++.exe" ^
+          -DCMAKE_CXX_FLAGS="-IC:\opt\rocm\include" ^
           -DCMAKE_CROSSCOMPILING=ON ^
           -DCMAKE_BUILD_TYPE=Release ^
-          -DAMDGPU_TARGETS="%mapped_target%" ^
+          -DGPU_TARGETS="%mapped_target%" ^
           -DBUILD_SHARED_LIBS=ON ^
           -DLLAMA_BUILD_TESTS=OFF ^
           -DGGML_HIP=ON ^
           -DGGML_OPENMP=OFF ^
           -DGGML_CUDA_FORCE_CUBLAS=OFF ^
-          -DGGML_HIP_ROCWMMA_FATTN=OFF ^
+          -DGGML_HIP_ROCWMMA_FATTN=ON ^
           -DLLAMA_CURL=OFF ^
           -DGGML_NATIVE=OFF ^
           -DGGML_STATIC=OFF ^
@@ -687,24 +659,7 @@ jobs:
         echo "PKG_CONFIG_PATH=/opt/rocm/lib/pkgconfig:${PKG_CONFIG_PATH:-}" >> $GITHUB_ENV
 
         echo "ROCm environment variables set successfully"
-
-    #- name: Install rocWMMA headers (develop)
-    #  run: |
-    #    set -e
-    #    TMP_DIR="$(mktemp -d)"
-    #    echo "Using temp directory: $TMP_DIR"
-    #    git clone --depth 1 --single-branch --branch develop https://github.com/ROCm/rocWMMA.git "$TMP_DIR/rocWMMA"
-    #    SRC_DIR="$TMP_DIR/rocWMMA/library/include/rocwmma"
-    #    if [ ! -d "$SRC_DIR" ]; then
-    #      echo "rocWMMA headers not found at expected path: $SRC_DIR"
-    #      exit 1
-    #    fi
-    #    echo "Installing headers to /opt/rocm/include/rocwmma ..."
-    #    sudo mkdir -p /opt/rocm/include
-    #    sudo rm -rf /opt/rocm/include/rocwmma
-    #    sudo cp -r "$SRC_DIR" /opt/rocm/include/
-    #    rm -rf "$TMP_DIR"
-    #    echo "rocWMMA headers (develop) installed successfully."
+
 
     - name: Clone llama.cpp
       run: |
@@ -727,11 +682,6 @@ jobs:
         echo "Current llama.cpp commit:"
         git log --oneline -1
 
-    #- name: Apply rocWMMA patch (non-interactive)
-    #  run: |
-    #    chmod +x utils/rocwmma_patch.sh
-    #    # Run non-interactively; keep backups (answer 'n')
-    #    printf "n\n" | ./utils/rocwmma_patch.sh ./llama.cpp
 
     - name: Build Llama.cpp + ROCm
       run: |
@@ -743,6 +693,8 @@ jobs:
           mapped_target="gfx1100;gfx1101;gfx1102"
         elif [ "$current_target" = "gfx1151" ]; then
           mapped_target="gfx1151"
+        elif [ "$current_target" = "gfx1150" ]; then
+          mapped_target="gfx1150"
         elif [ "$current_target" = "gfx120X" ]; then
           mapped_target="gfx1200;gfx1201"
         else
@@ -759,15 +711,16 @@ jobs:
         cmake .. -G Ninja \
           -DCMAKE_C_COMPILER=/opt/rocm/llvm/bin/clang \
           -DCMAKE_CXX_COMPILER=/opt/rocm/llvm/bin/clang++ \
+          -DCMAKE_CXX_FLAGS="-I/opt/rocm/include" \
           -DCMAKE_CROSSCOMPILING=ON \
           -DCMAKE_BUILD_TYPE=Release \
-          -DAMDGPU_TARGETS="$mapped_target" \
+          -DGPU_TARGETS="$mapped_target" \
           -DBUILD_SHARED_LIBS=ON \
           -DLLAMA_BUILD_TESTS=OFF \
           -DGGML_HIP=ON \
           -DGGML_OPENMP=OFF \
           -DGGML_CUDA_FORCE_CUBLAS=OFF \
-          -DGGML_HIP_ROCWMMA_FATTN=OFF \
+          -DGGML_HIP_ROCWMMA_FATTN=ON \
           -DLLAMA_CURL=OFF \
           -DGGML_NATIVE=OFF \
           -DGGML_STATIC=OFF \
@@ -931,7 +884,7 @@ jobs:
     needs: [prepare-matrix, build-windows]
     if: |
       needs.build-windows.result == 'success' &&
-      contains(github.event.inputs.gfx_target || 'gfx1151,gfx120X,gfx110X', 'gfx1151') &&
+      contains(github.event.inputs.gfx_target || 'gfx1151,gfx1150,gfx120X,gfx110X', 'gfx1151') &&
       needs.prepare-matrix.outputs.should_build_windows == 'true'
 
     steps:
@@ -1053,7 +1006,7 @@ jobs:
     needs: [prepare-matrix, build-ubuntu]
     if: |
       needs.build-ubuntu.result == 'success' &&
-      contains(github.event.inputs.gfx_target || 'gfx1151,gfx120X,gfx110X', 'gfx1151') &&
+      contains(github.event.inputs.gfx_target || 'gfx1151,gfx1150,gfx120X,gfx110X', 'gfx1151') &&
       needs.prepare-matrix.outputs.should_build_ubuntu == 'true'
 
     steps:

diff --git a/README.md b/README.md
@@ -31,7 +31,8 @@ We provide nightly builds of **llama.cpp** with **AMD ROCm™ 7** acceleration b
 ## 🎯 Supported Devices
 
 This build specifically targets the following GPU architectures:
-- **gfx1151** (STX Halo GPUs) - Ryzen AI MAX+ Pro 395
+- **gfx1151** (STX Halo APU) - Ryzen AI MAX+ Pro 395
+- **gfx1150** (STX Point APU) - Ryzen AI 300
 - **gfx120X** (RDNA4 GPUs) - includes AMD Radeon RX 9070 XT/GRE/9070, RX 9060 XT/9060
 - **gfx110X** (RDNA3 GPUs) - includes AMD Radeon PRO W7900/W7800/W7700/W7600, RX 7900 XTX/XT/GRE, RX 7800 XT, RX 7700 XT/7700, RX 7600 XT/7600
 
@@ -41,13 +42,14 @@ This build specifically targets the following GPU architectures:
 
 Our automated GitHub Actions workflow creates nightly builds for:
 - **Windows** and **Ubuntu** operating systems
-- **Multiple GPU targets**: `gfx1151`, `gfx120X`, `gfx110X`
+- **Multiple GPU targets**: `gfx1151`, `gfx1150`, `gfx110X`, `gfx120X`
 - **ROCm™ 7 built-in** - complete runtime libraries included
 
 
 | GPU Target | Ubuntu | Windows |
 |-------------|--------|---------|
 | **gfx110X** | [![Download Ubuntu gfx110X](https://img.shields.io/badge/Download-Ubuntu%20gfx110X-blue)](https://github.com/aigdat/llamacpp-rocm/releases/latest) | [![Download Windows gfx110X](https://img.shields.io/badge/Download-Windows%20gfx110X-green)](https://github.com/aigdat/llamacpp-rocm/releases/latest) |
+| **gfx1150** | [![Download Ubuntu gfx1150](https://img.shields.io/badge/Download-Ubuntu%20gfx1150-blue)](https://github.com/aigdat/llamacpp-rocm/releases/latest) | [![Download Windows gfx1150](https://img.shields.io/badge/Download-Windows%20gfx1150-green)](https://github.com/aigdat/llamacpp-rocm/releases/latest) |
 | **gfx1151** | [![Download Ubuntu gfx1151](https://img.shields.io/badge/Download-Ubuntu%20gfx1151-blue)](https://github.com/aigdat/llamacpp-rocm/releases/latest) | [![Download Windows gfx1151](https://img.shields.io/badge/Download-Windows%20gfx1151-green)](https://github.com/aigdat/llamacpp-rocm/releases/latest) |
 | **gfx120X** | [![Download Ubuntu gfx120X](https://img.shields.io/badge/Download-Ubuntu%20gfx120X-blue)](https://github.com/aigdat/llamacpp-rocm/releases/latest) | [![Download Windows gfx120X](https://img.shields.io/badge/Download-Windows%20gfx120X-green)](https://github.com/aigdat/llamacpp-rocm/releases/latest) |
 

diff --git a/docs/manual_instructions.md b/docs/manual_instructions.md
@@ -31,16 +31,13 @@ I used chocolatey, but you can also install those manually.
 
 ### Part 2: Organizing artifacts
 * Step 1: Get the latest run id from main [here](https://github.com/ROCm/TheRock/actions/workflows/release_windows_packages.yml).
-  * Example: [TheRock/actions/runs/16218534118/job/45793425858](https://github.com/ROCm/TheRock/actions/runs/16218534118/job/45793425858)
+  * Example: [TheRock/actions/runs/19952310972/job/57214748637](https://github.com/ROCm/TheRock/actions/runs/19952310972/job/57214748637)
 * Step 2: Look at the upload logs for your target GPU (e.g., `gfx1151`), and note the Windows URL:
-  ```
-  ://therock-nightly-tarball/therock-dist-windows-gfx1151-7.0.0rc20250711.tar.gz
-  ```
-* Step 4: Download the nightly tarball 
-  * Example: `therock-nightly-tarball.s3.amazonaws.com/YOUR_FILE`
-* Step 5: Extract the contents of this tar.gz file to `C:\opt\rocm`
-* Setp 6: Add `C:\opt\rocm\lib\llvm\bin` to path
-* Step 7: clone llamacpp
+* Step 3: Download the nightly tarball for Windows
+  * Example: [therock-nightly-tarball.s3.amazonaws.com/YOUR_WINDOWS_FILE](https://therock-nightly-tarball.s3.amazonaws.com/therock-dist-windows-gfx1151-7.11.0a20251205.tar.gz)
+* Step 4: Extract the contents of this tar.gz file to `C:\opt\rocm`
+* Setp 5: Add `C:\opt\rocm\lib\llvm\bin` to path
+* Step 6: clone llamacpp
 
 ### Part 3: Building Llama.cpp + ROCm
 
@@ -53,11 +50,11 @@ set HIP_PLATFORM=amd
 cd "C:\<YOUR_LLAMACPP_PATH>\llama.cpp"
 mkdir build
 cd build
-cmake .. -G Ninja -DCMAKE_C_COMPILER="C:\opt\rocm\lib\llvm\bin\clang.exe" -DCMAKE_CXX_COMPILER="C:\opt\rocm\lib\llvm\bin\clang++.exe" -DCMAKE_CROSSCOMPILING=ON -DCMAKE_BUILD_TYPE=Release -DAMDGPU_TARGETS="gfx1151" -DBUILD_SHARED_LIBS=ON -DLLAMA_BUILD_TESTS=OFF -DGGML_HIP=ON -DGGML_OPENMP=OFF -DGGML_CUDA_FORCE_CUBLAS=OFF -DGGML_HIP_ROCWMMA_FATTN=ON -DGGML_HIP_FORCE_ROCWMMA_FATTN_GFX12=OFF -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DGGML_STATIC=OFF -DCMAKE_SYSTEM_NAME=Windows
+cmake .. -G Ninja -DCMAKE_C_COMPILER="C:\opt\rocm\lib\llvm\bin\clang.exe" -DCMAKE_CXX_COMPILER="C:\opt\rocm\lib\llvm\bin\clang++.exe" -DCMAKE_CXX_FLAGS="-IC:\opt\rocm\include" -DCMAKE_CROSSCOMPILING=ON -DCMAKE_BUILD_TYPE=Release -DGPU_TARGETS="gfx1151" -DBUILD_SHARED_LIBS=ON -DLLAMA_BUILD_TESTS=OFF -DGGML_HIP=ON -DGGML_OPENMP=OFF -DGGML_CUDA_FORCE_CUBLAS=OFF -DGGML_HIP_ROCWMMA_FATTN=ON -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DGGML_STATIC=OFF -DCMAKE_SYSTEM_NAME=Windows
 cmake --build . -j 24 2>&1 | findstr /i "error"
 ```
 
-> **Note**: Adjust the `-DAMDGPU_TARGETS="gfx1151"` parameter for your specific GPU. See the [GPU Target Reference](#gpu-target-reference) section for details.
+> **Note**: Adjust the `-DGPU_TARGETS="gfx1151"` parameter for your specific GPU. See the [GPU Target Reference](#gpu-target-reference) section for details.
 
 If you see no errors, that means that llama.cpp has correctly been built and files are available inside your `build\bin` folder. 
 
@@ -82,14 +79,14 @@ sudo apt install -y cmake ninja-build git wget
 * Step 1: Get the latest run id from main [here](https://github.com/ROCm/TheRock/actions/workflows/release_windows_packages.yml) (same as Windows).
 * Step 2: Look at the upload logs for your target GPU (e.g., `gfx1151`), but note the Linux URLs:
   ```
-  ://therock-nightly-tarball/therock-dist-linux-gfx1151-7.0.0rc20250711.tar.gz
+  ://therock-nightly-tarball/therock-dist-linux-gfx1151-7.11.0a20251205.tar.gz
   ```
 * Step 3: Download the nightly tarball for Linux
-  * Example: [therock-nightly-tarball.s3.amazonaws.com/YOUR_LINUX_FILE](https://therock-nightly-tarball.s3.amazonaws.com/therock-dist-linux-gfx1151-7.0.0rc20250711.tar.gz)
+  * Example: [therock-nightly-tarball.s3.amazonaws.com/YOUR_LINUX_FILE](https://therock-nightly-tarball.s3.amazonaws.com/therock-dist-linux-gfx1151-7.11.0a20251205.tar.gz)
 * Step 4: Extract the contents of this tar.gz file to `/opt/rocm`:
   ```bash
   sudo mkdir -p /opt/rocm
-  sudo tar -xzf therock-dist-linux-gfx1151-7.0.0rc20250711.tar.gz -C /opt/rocm --strip-components=1
+  sudo tar -xzf therock-dist-linux-gfx1151-7.11.0a20251205.tar.gz -C /opt/rocm --strip-components=1
   ```
 * Step 5: Set up ROCm environment variables:
   ```bash
@@ -127,9 +124,10 @@ cd build
 cmake .. -G Ninja \
   -DCMAKE_C_COMPILER=/opt/rocm/llvm/bin/clang \
   -DCMAKE_CXX_COMPILER=/opt/rocm/llvm/bin/clang++ \
+  -DCMAKE_CXX_FLAGS="-I/opt/rocm/include" \
   -DCMAKE_CROSSCOMPILING=ON \
   -DCMAKE_BUILD_TYPE=Release \
-  -DAMDGPU_TARGETS="gfx1151" \
+  -DGPU_TARGETS="gfx1151" \
   -DBUILD_SHARED_LIBS=ON \
   -DLLAMA_BUILD_TESTS=OFF \
   -DGGML_HIP=ON \
@@ -145,7 +143,7 @@ cmake .. -G Ninja \
 cmake --build . -j $(nproc)
 ```
 
-> **Note**: Adjust the `-DAMDGPU_TARGETS="gfx1151"` parameter for your specific GPU. See the [GPU Target Reference](#gpu-target-reference) section for details.
+> **Note**: Adjust the `-DGPU_TARGETS="gfx1151"` parameter for your specific GPU. See the [GPU Target Reference](#gpu-target-reference) section for details.
 
 ### Part 4: Copy required ROCm libraries
 
@@ -179,25 +177,29 @@ If you see no errors during the build process, llama.cpp has been successfully c
 
 ## 🎯 GPU Target Reference
 
-When building llama.cpp with ROCm, the `-DAMDGPU_TARGETS` parameter must be set based on your specific GPU architecture. Our automated workflow uses generic targets that get mapped to specific architectures:
+When building llama.cpp with ROCm, the `-DGPU_TARGETS` parameter must be set based on your specific GPU architecture. Our automated workflow uses generic targets that get mapped to specific architectures:
 
 - **`gfx120X`** maps to `gfx1200, gfx1201` (RDNA4 series like: RX 9070 XT/GRE/9070, RX 9060 XT/9060)
 - **`gfx110X`** maps to `gfx1100, gfx1101, gfx1102` (RDNA3 series like: PRO W7900/W7800/W7700/W7600, RX 7900 XTX/XT/GRE, RX 7800 XT, RX 7700 XT/7700, RX 7600 XT/7600)  
+- **`gfx1150`** remains as `gfx1150` (Strix Point)
 - **`gfx1151`** remains as `gfx1151` (Strix Halo)
 
 For a complete list of GPU targets and their mappings, see the [automated workflow](../.github/workflows/build-llamacpp-rocm.yml).
 
 ### How to Use
 
-Replace the `-DAMDGPU_TARGETS="gfx1151"` parameter in your cmake command with the appropriate target for your GPU:
+Replace the `-DGPU_TARGETS="gfx1151"` parameter in your cmake command with the appropriate target for your GPU:
 
 ```bash
 # For RDNA4 series (RX 9070 XT/GRE/9070, RX 9060 XT/9060)
--DAMDGPU_TARGETS="gfx1200, gfx1201"
+-DGPU_TARGETS="gfx1200, gfx1201"
 
 # For RDNA3 series (PRO W7900/W7800/W7700/W7600, RX 7900 XTX/XT/GRE, RX 7800 XT, RX 7700 XT/7700, RX 7600 XT/7600) 
--DAMDGPU_TARGETS="gfx1100"
+-DGPU_TARGETS="gfx1100;gfx1101;gfx1102"
+
+# For Strix Point
+-DGPU_TARGETS="gfx1150"
 
 # For Strix Halo
--DAMDGPU_TARGETS="gfx1151"
+-DGPU_TARGETS="gfx1151"
 ```