diff --git a/.github/workflows/build-llamacpp-rocm.yml b/.github/workflows/build-llamacpp-rocm.yml index f4793d9..0478b91 100644 --- a/.github/workflows/build-llamacpp-rocm.yml +++ b/.github/workflows/build-llamacpp-rocm.yml @@ -9,9 +9,9 @@ on: gfx_target: description: 'AMD GPU targets (comma-separated)' required: false - default: 'gfx1151,gfx120X,gfx110X' + default: 'gfx1151,gfx1150,gfx120X,gfx110X' rocm_version: - description: 'ROCm version to use (e.g., 7.10.0a20251022) or "latest" to auto-detect' + description: 'ROCm version to use (e.g., 7.11.0a20251205) or "latest" to auto-detect' required: false default: 'latest' llamacpp_version: @@ -36,7 +36,7 @@ on: env: OPERATING_SYSTEMS: ${{ github.event.inputs.operating_systems || 'windows,ubuntu' }} - GFX_TARGETS: ${{ github.event.inputs.gfx_target || 'gfx1151,gfx120X,gfx110X' }} + GFX_TARGETS: ${{ github.event.inputs.gfx_target || 'gfx1151,gfx1150,gfx120X,gfx110X' }} ROCM_VERSION: ${{ github.event.inputs.rocm_version || 'latest' }} LLAMACPP_VERSION: ${{ github.event.inputs.llamacpp_version || 'latest' }} @@ -241,31 +241,7 @@ jobs: # Extract the tarball tar -xzf rocm.tar.gz -C C:\opt\rocm --strip-components=1 - - #- name: Install rocWMMA headers (develop) - # run: | - # $repoUrl = "https://github.com/ROCm/rocWMMA.git" - # $cloneDir = Join-Path $env:TEMP "rocWMMA-develop" - # $destIncludeDir = "C:\opt\rocm\include" - # $destHeaders = Join-Path $destIncludeDir "rocwmma" - # - # if (Test-Path $cloneDir) { Remove-Item -Recurse -Force $cloneDir } - # Write-Host "Cloning rocWMMA develop branch..." - # git clone --depth 1 --single-branch --branch develop $repoUrl $cloneDir - # - # $srcHeaders = Join-Path $cloneDir "library\include\rocwmma" - # if (!(Test-Path $srcHeaders)) { - # Write-Error "rocWMMA headers not found at expected path: $srcHeaders" - # exit 1 - # } - # - # Write-Host "Installing headers to $destHeaders ..." - # New-Item -ItemType Directory -Force -Path $destIncludeDir | Out-Null - # if (Test-Path $destHeaders) { Remove-Item -Recurse -Force $destHeaders } - # Copy-Item -Recurse -Force $srcHeaders $destHeaders - # - # Write-Host "rocWMMA headers (develop) installed successfully." - + - name: Clone llama.cpp run: | $llamacppVersion = "${{ env.LLAMACPP_VERSION }}" @@ -287,13 +263,6 @@ jobs: Write-Host "Current llama.cpp commit:" git log --oneline -1 - # - name: Apply rocWMMA patch (non-interactive) - # shell: bash - # run: | - # chmod +x utils/rocwmma_patch.sh - # # Run non-interactively; keep backups (answer 'n') - # printf "n\n" | ./utils/rocwmma_patch.sh ./llama.cpp - - name: Build Llama.cpp + ROCm shell: cmd run: | @@ -306,6 +275,8 @@ jobs: set "mapped_target=gfx1100;gfx1101;gfx1102" ) else if "%current_target%"=="gfx1151" ( set "mapped_target=gfx1151" + ) else if "%current_target%"=="gfx1150" ( + set "mapped_target=gfx1150" ) else if "%current_target%"=="gfx120X" ( set "mapped_target=gfx1200;gfx1201" ) else ( @@ -332,15 +303,16 @@ jobs: cmake .. -G Ninja ^ -DCMAKE_C_COMPILER="C:\opt\rocm\lib\llvm\bin\clang.exe" ^ -DCMAKE_CXX_COMPILER="C:\opt\rocm\lib\llvm\bin\clang++.exe" ^ + -DCMAKE_CXX_FLAGS="-IC:\opt\rocm\include" ^ -DCMAKE_CROSSCOMPILING=ON ^ -DCMAKE_BUILD_TYPE=Release ^ - -DAMDGPU_TARGETS="%mapped_target%" ^ + -DGPU_TARGETS="%mapped_target%" ^ -DBUILD_SHARED_LIBS=ON ^ -DLLAMA_BUILD_TESTS=OFF ^ -DGGML_HIP=ON ^ -DGGML_OPENMP=OFF ^ -DGGML_CUDA_FORCE_CUBLAS=OFF ^ - -DGGML_HIP_ROCWMMA_FATTN=OFF ^ + -DGGML_HIP_ROCWMMA_FATTN=ON ^ -DLLAMA_CURL=OFF ^ -DGGML_NATIVE=OFF ^ -DGGML_STATIC=OFF ^ @@ -687,24 +659,7 @@ jobs: echo "PKG_CONFIG_PATH=/opt/rocm/lib/pkgconfig:${PKG_CONFIG_PATH:-}" >> $GITHUB_ENV echo "ROCm environment variables set successfully" - - #- name: Install rocWMMA headers (develop) - # run: | - # set -e - # TMP_DIR="$(mktemp -d)" - # echo "Using temp directory: $TMP_DIR" - # git clone --depth 1 --single-branch --branch develop https://github.com/ROCm/rocWMMA.git "$TMP_DIR/rocWMMA" - # SRC_DIR="$TMP_DIR/rocWMMA/library/include/rocwmma" - # if [ ! -d "$SRC_DIR" ]; then - # echo "rocWMMA headers not found at expected path: $SRC_DIR" - # exit 1 - # fi - # echo "Installing headers to /opt/rocm/include/rocwmma ..." - # sudo mkdir -p /opt/rocm/include - # sudo rm -rf /opt/rocm/include/rocwmma - # sudo cp -r "$SRC_DIR" /opt/rocm/include/ - # rm -rf "$TMP_DIR" - # echo "rocWMMA headers (develop) installed successfully." + - name: Clone llama.cpp run: | @@ -727,11 +682,6 @@ jobs: echo "Current llama.cpp commit:" git log --oneline -1 - #- name: Apply rocWMMA patch (non-interactive) - # run: | - # chmod +x utils/rocwmma_patch.sh - # # Run non-interactively; keep backups (answer 'n') - # printf "n\n" | ./utils/rocwmma_patch.sh ./llama.cpp - name: Build Llama.cpp + ROCm run: | @@ -743,6 +693,8 @@ jobs: mapped_target="gfx1100;gfx1101;gfx1102" elif [ "$current_target" = "gfx1151" ]; then mapped_target="gfx1151" + elif [ "$current_target" = "gfx1150" ]; then + mapped_target="gfx1150" elif [ "$current_target" = "gfx120X" ]; then mapped_target="gfx1200;gfx1201" else @@ -759,15 +711,16 @@ jobs: cmake .. -G Ninja \ -DCMAKE_C_COMPILER=/opt/rocm/llvm/bin/clang \ -DCMAKE_CXX_COMPILER=/opt/rocm/llvm/bin/clang++ \ + -DCMAKE_CXX_FLAGS="-I/opt/rocm/include" \ -DCMAKE_CROSSCOMPILING=ON \ -DCMAKE_BUILD_TYPE=Release \ - -DAMDGPU_TARGETS="$mapped_target" \ + -DGPU_TARGETS="$mapped_target" \ -DBUILD_SHARED_LIBS=ON \ -DLLAMA_BUILD_TESTS=OFF \ -DGGML_HIP=ON \ -DGGML_OPENMP=OFF \ -DGGML_CUDA_FORCE_CUBLAS=OFF \ - -DGGML_HIP_ROCWMMA_FATTN=OFF \ + -DGGML_HIP_ROCWMMA_FATTN=ON \ -DLLAMA_CURL=OFF \ -DGGML_NATIVE=OFF \ -DGGML_STATIC=OFF \ @@ -931,7 +884,7 @@ jobs: needs: [prepare-matrix, build-windows] if: | needs.build-windows.result == 'success' && - contains(github.event.inputs.gfx_target || 'gfx1151,gfx120X,gfx110X', 'gfx1151') && + contains(github.event.inputs.gfx_target || 'gfx1151,gfx1150,gfx120X,gfx110X', 'gfx1151') && needs.prepare-matrix.outputs.should_build_windows == 'true' steps: @@ -1053,7 +1006,7 @@ jobs: needs: [prepare-matrix, build-ubuntu] if: | needs.build-ubuntu.result == 'success' && - contains(github.event.inputs.gfx_target || 'gfx1151,gfx120X,gfx110X', 'gfx1151') && + contains(github.event.inputs.gfx_target || 'gfx1151,gfx1150,gfx120X,gfx110X', 'gfx1151') && needs.prepare-matrix.outputs.should_build_ubuntu == 'true' steps: diff --git a/README.md b/README.md index 799da4b..8152388 100644 --- a/README.md +++ b/README.md @@ -31,7 +31,8 @@ We provide nightly builds of **llama.cpp** with **AMD ROCmβ„’ 7** acceleration b ## 🎯 Supported Devices This build specifically targets the following GPU architectures: -- **gfx1151** (STX Halo GPUs) - Ryzen AI MAX+ Pro 395 +- **gfx1151** (STX Halo APU) - Ryzen AI MAX+ Pro 395 +- **gfx1150** (STX Point APU) - Ryzen AI 300 - **gfx120X** (RDNA4 GPUs) - includes AMD Radeon RX 9070 XT/GRE/9070, RX 9060 XT/9060 - **gfx110X** (RDNA3 GPUs) - includes AMD Radeon PRO W7900/W7800/W7700/W7600, RX 7900 XTX/XT/GRE, RX 7800 XT, RX 7700 XT/7700, RX 7600 XT/7600 @@ -41,13 +42,14 @@ This build specifically targets the following GPU architectures: Our automated GitHub Actions workflow creates nightly builds for: - **Windows** and **Ubuntu** operating systems -- **Multiple GPU targets**: `gfx1151`, `gfx120X`, `gfx110X` +- **Multiple GPU targets**: `gfx1151`, `gfx1150`, `gfx110X`, `gfx120X` - **ROCmβ„’ 7 built-in** - complete runtime libraries included | GPU Target | Ubuntu | Windows | |-------------|--------|---------| | **gfx110X** | [![Download Ubuntu gfx110X](https://img.shields.io/badge/Download-Ubuntu%20gfx110X-blue)](https://github.com/aigdat/llamacpp-rocm/releases/latest) | [![Download Windows gfx110X](https://img.shields.io/badge/Download-Windows%20gfx110X-green)](https://github.com/aigdat/llamacpp-rocm/releases/latest) | +| **gfx1150** | [![Download Ubuntu gfx1150](https://img.shields.io/badge/Download-Ubuntu%20gfx1150-blue)](https://github.com/aigdat/llamacpp-rocm/releases/latest) | [![Download Windows gfx1150](https://img.shields.io/badge/Download-Windows%20gfx1150-green)](https://github.com/aigdat/llamacpp-rocm/releases/latest) | | **gfx1151** | [![Download Ubuntu gfx1151](https://img.shields.io/badge/Download-Ubuntu%20gfx1151-blue)](https://github.com/aigdat/llamacpp-rocm/releases/latest) | [![Download Windows gfx1151](https://img.shields.io/badge/Download-Windows%20gfx1151-green)](https://github.com/aigdat/llamacpp-rocm/releases/latest) | | **gfx120X** | [![Download Ubuntu gfx120X](https://img.shields.io/badge/Download-Ubuntu%20gfx120X-blue)](https://github.com/aigdat/llamacpp-rocm/releases/latest) | [![Download Windows gfx120X](https://img.shields.io/badge/Download-Windows%20gfx120X-green)](https://github.com/aigdat/llamacpp-rocm/releases/latest) | diff --git a/docs/manual_instructions.md b/docs/manual_instructions.md index 9b4c9fe..66d7a41 100644 --- a/docs/manual_instructions.md +++ b/docs/manual_instructions.md @@ -31,16 +31,13 @@ I used chocolatey, but you can also install those manually. ### Part 2: Organizing artifacts * Step 1: Get the latest run id from main [here](https://github.com/ROCm/TheRock/actions/workflows/release_windows_packages.yml). - * Example: [TheRock/actions/runs/16218534118/job/45793425858](https://github.com/ROCm/TheRock/actions/runs/16218534118/job/45793425858) + * Example: [TheRock/actions/runs/19952310972/job/57214748637](https://github.com/ROCm/TheRock/actions/runs/19952310972/job/57214748637) * Step 2: Look at the upload logs for your target GPU (e.g., `gfx1151`), and note the Windows URL: - ``` - ://therock-nightly-tarball/therock-dist-windows-gfx1151-7.0.0rc20250711.tar.gz - ``` -* Step 4: Download the nightly tarball - * Example: `therock-nightly-tarball.s3.amazonaws.com/YOUR_FILE` -* Step 5: Extract the contents of this tar.gz file to `C:\opt\rocm` -* Setp 6: Add `C:\opt\rocm\lib\llvm\bin` to path -* Step 7: clone llamacpp +* Step 3: Download the nightly tarball for Windows + * Example: [therock-nightly-tarball.s3.amazonaws.com/YOUR_WINDOWS_FILE](https://therock-nightly-tarball.s3.amazonaws.com/therock-dist-windows-gfx1151-7.11.0a20251205.tar.gz) +* Step 4: Extract the contents of this tar.gz file to `C:\opt\rocm` +* Setp 5: Add `C:\opt\rocm\lib\llvm\bin` to path +* Step 6: clone llamacpp ### Part 3: Building Llama.cpp + ROCm @@ -53,11 +50,11 @@ set HIP_PLATFORM=amd cd "C:\\llama.cpp" mkdir build cd build -cmake .. -G Ninja -DCMAKE_C_COMPILER="C:\opt\rocm\lib\llvm\bin\clang.exe" -DCMAKE_CXX_COMPILER="C:\opt\rocm\lib\llvm\bin\clang++.exe" -DCMAKE_CROSSCOMPILING=ON -DCMAKE_BUILD_TYPE=Release -DAMDGPU_TARGETS="gfx1151" -DBUILD_SHARED_LIBS=ON -DLLAMA_BUILD_TESTS=OFF -DGGML_HIP=ON -DGGML_OPENMP=OFF -DGGML_CUDA_FORCE_CUBLAS=OFF -DGGML_HIP_ROCWMMA_FATTN=ON -DGGML_HIP_FORCE_ROCWMMA_FATTN_GFX12=OFF -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DGGML_STATIC=OFF -DCMAKE_SYSTEM_NAME=Windows +cmake .. -G Ninja -DCMAKE_C_COMPILER="C:\opt\rocm\lib\llvm\bin\clang.exe" -DCMAKE_CXX_COMPILER="C:\opt\rocm\lib\llvm\bin\clang++.exe" -DCMAKE_CXX_FLAGS="-IC:\opt\rocm\include" -DCMAKE_CROSSCOMPILING=ON -DCMAKE_BUILD_TYPE=Release -DGPU_TARGETS="gfx1151" -DBUILD_SHARED_LIBS=ON -DLLAMA_BUILD_TESTS=OFF -DGGML_HIP=ON -DGGML_OPENMP=OFF -DGGML_CUDA_FORCE_CUBLAS=OFF -DGGML_HIP_ROCWMMA_FATTN=ON -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DGGML_STATIC=OFF -DCMAKE_SYSTEM_NAME=Windows cmake --build . -j 24 2>&1 | findstr /i "error" ``` -> **Note**: Adjust the `-DAMDGPU_TARGETS="gfx1151"` parameter for your specific GPU. See the [GPU Target Reference](#gpu-target-reference) section for details. +> **Note**: Adjust the `-DGPU_TARGETS="gfx1151"` parameter for your specific GPU. See the [GPU Target Reference](#gpu-target-reference) section for details. If you see no errors, that means that llama.cpp has correctly been built and files are available inside your `build\bin` folder. @@ -82,14 +79,14 @@ sudo apt install -y cmake ninja-build git wget * Step 1: Get the latest run id from main [here](https://github.com/ROCm/TheRock/actions/workflows/release_windows_packages.yml) (same as Windows). * Step 2: Look at the upload logs for your target GPU (e.g., `gfx1151`), but note the Linux URLs: ``` - ://therock-nightly-tarball/therock-dist-linux-gfx1151-7.0.0rc20250711.tar.gz + ://therock-nightly-tarball/therock-dist-linux-gfx1151-7.11.0a20251205.tar.gz ``` * Step 3: Download the nightly tarball for Linux - * Example: [therock-nightly-tarball.s3.amazonaws.com/YOUR_LINUX_FILE](https://therock-nightly-tarball.s3.amazonaws.com/therock-dist-linux-gfx1151-7.0.0rc20250711.tar.gz) + * Example: [therock-nightly-tarball.s3.amazonaws.com/YOUR_LINUX_FILE](https://therock-nightly-tarball.s3.amazonaws.com/therock-dist-linux-gfx1151-7.11.0a20251205.tar.gz) * Step 4: Extract the contents of this tar.gz file to `/opt/rocm`: ```bash sudo mkdir -p /opt/rocm - sudo tar -xzf therock-dist-linux-gfx1151-7.0.0rc20250711.tar.gz -C /opt/rocm --strip-components=1 + sudo tar -xzf therock-dist-linux-gfx1151-7.11.0a20251205.tar.gz -C /opt/rocm --strip-components=1 ``` * Step 5: Set up ROCm environment variables: ```bash @@ -127,9 +124,10 @@ cd build cmake .. -G Ninja \ -DCMAKE_C_COMPILER=/opt/rocm/llvm/bin/clang \ -DCMAKE_CXX_COMPILER=/opt/rocm/llvm/bin/clang++ \ + -DCMAKE_CXX_FLAGS="-I/opt/rocm/include" \ -DCMAKE_CROSSCOMPILING=ON \ -DCMAKE_BUILD_TYPE=Release \ - -DAMDGPU_TARGETS="gfx1151" \ + -DGPU_TARGETS="gfx1151" \ -DBUILD_SHARED_LIBS=ON \ -DLLAMA_BUILD_TESTS=OFF \ -DGGML_HIP=ON \ @@ -145,7 +143,7 @@ cmake .. -G Ninja \ cmake --build . -j $(nproc) ``` -> **Note**: Adjust the `-DAMDGPU_TARGETS="gfx1151"` parameter for your specific GPU. See the [GPU Target Reference](#gpu-target-reference) section for details. +> **Note**: Adjust the `-DGPU_TARGETS="gfx1151"` parameter for your specific GPU. See the [GPU Target Reference](#gpu-target-reference) section for details. ### Part 4: Copy required ROCm libraries @@ -179,25 +177,29 @@ If you see no errors during the build process, llama.cpp has been successfully c ## 🎯 GPU Target Reference -When building llama.cpp with ROCm, the `-DAMDGPU_TARGETS` parameter must be set based on your specific GPU architecture. Our automated workflow uses generic targets that get mapped to specific architectures: +When building llama.cpp with ROCm, the `-DGPU_TARGETS` parameter must be set based on your specific GPU architecture. Our automated workflow uses generic targets that get mapped to specific architectures: - **`gfx120X`** maps to `gfx1200, gfx1201` (RDNA4 series like: RX 9070 XT/GRE/9070, RX 9060 XT/9060) - **`gfx110X`** maps to `gfx1100, gfx1101, gfx1102` (RDNA3 series like: PRO W7900/W7800/W7700/W7600, RX 7900 XTX/XT/GRE, RX 7800 XT, RX 7700 XT/7700, RX 7600 XT/7600) +- **`gfx1150`** remains as `gfx1150` (Strix Point) - **`gfx1151`** remains as `gfx1151` (Strix Halo) For a complete list of GPU targets and their mappings, see the [automated workflow](../.github/workflows/build-llamacpp-rocm.yml). ### How to Use -Replace the `-DAMDGPU_TARGETS="gfx1151"` parameter in your cmake command with the appropriate target for your GPU: +Replace the `-DGPU_TARGETS="gfx1151"` parameter in your cmake command with the appropriate target for your GPU: ```bash # For RDNA4 series (RX 9070 XT/GRE/9070, RX 9060 XT/9060) --DAMDGPU_TARGETS="gfx1200, gfx1201" +-DGPU_TARGETS="gfx1200, gfx1201" # For RDNA3 series (PRO W7900/W7800/W7700/W7600, RX 7900 XTX/XT/GRE, RX 7800 XT, RX 7700 XT/7700, RX 7600 XT/7600) --DAMDGPU_TARGETS="gfx1100" +-DGPU_TARGETS="gfx1100;gfx1101;gfx1102" + +# For Strix Point +-DGPU_TARGETS="gfx1150" # For Strix Halo --DAMDGPU_TARGETS="gfx1151" +-DGPU_TARGETS="gfx1151" ``` diff --git a/utils/rocwmma_patch.sh b/utils/rocwmma_patch.sh deleted file mode 100644 index 2cd582a..0000000 --- a/utils/rocwmma_patch.sh +++ /dev/null @@ -1,165 +0,0 @@ -#!/bin/bash - -# rocwmma_patch.sh - Apply rocWMMA compatibility fixes to llama.cpp -# Usage: ./rocwmma_patch.sh - -set -euo pipefail - -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -LLAMA_DIR="${1:-}" - -if [[ -z "$LLAMA_DIR" ]]; then - echo "Usage: $0 " - echo "" - echo "This script applies rocWMMA compatibility fixes to a llama.cpp checkout." - echo "The fixes resolve warp synchronization mask type conflicts between" - echo "ROCm headers and CUDA-style code when building with GGML_HIP_ROCWMMA_FATTN=ON." - echo "" - echo "Example:" - echo " $0 ./llama.cpp" - echo " $0 /path/to/your/llama.cpp" - exit 1 -fi - -if [[ ! -d "$LLAMA_DIR" ]]; then - echo "Error: Directory '$LLAMA_DIR' does not exist" - exit 1 -fi - -if [[ ! -f "$LLAMA_DIR/CMakeLists.txt" ]] || ! grep -q "llama" "$LLAMA_DIR/CMakeLists.txt" 2>/dev/null; then - echo "Error: '$LLAMA_DIR' does not appear to be a llama.cpp directory" - echo "Expected to find CMakeLists.txt with 'llama' references" - exit 1 -fi - -VENDOR_HIP_FILE="$LLAMA_DIR/ggml/src/ggml-cuda/vendors/hip.h" - -if [[ ! -f "$VENDOR_HIP_FILE" ]]; then - echo "Error: HIP vendor header not found at: $VENDOR_HIP_FILE" - echo "This script requires a llama.cpp version with HIP support" - exit 1 -fi - -echo "Applying rocWMMA compatibility fixes to: $LLAMA_DIR" -echo "" - -# Check if fixes are already applied -if grep -q "GGML_HIP_WARP_MASK" "$VENDOR_HIP_FILE" 2>/dev/null; then - echo "rocWMMA fixes appear to already be applied (found GGML_HIP_WARP_MASK)" - echo "To reapply, please first revert changes and run this script again" - exit 0 -fi - -echo "Step 1: Modifying HIP vendor header..." - -# Backup the original file -cp "$VENDOR_HIP_FILE" "$VENDOR_HIP_FILE.backup" - -# Find the line with __shfl_sync and __shfl_xor_sync definitions -SHFL_LINE=$(grep -n "^#define __shfl_sync" "$VENDOR_HIP_FILE" | head -1 | cut -d: -f1) - -if [[ -z "$SHFL_LINE" ]]; then - echo "Error: Could not find __shfl_sync macro definition in $VENDOR_HIP_FILE" - echo "This script may need updates for this version of llama.cpp" - exit 1 -fi - -# Create a temporary file with the fix -{ - # Print lines before the __shfl_sync definition - head -n $((SHFL_LINE - 1)) "$VENDOR_HIP_FILE" - - # Add our conditional compilation block - cat << 'EOF' -#ifdef GGML_HIP_ROCWMMA_FATTN -// ROCm requires 64-bit masks for __shfl_*_sync functions -#define GGML_HIP_WARP_MASK 0xFFFFFFFFFFFFFFFFULL -#else -#define __shfl_sync(mask, var, laneMask, width) __shfl(var, laneMask, width) -#define __shfl_xor_sync(mask, var, laneMask, width) __shfl_xor(var, laneMask, width) -#define GGML_HIP_WARP_MASK 0xFFFFFFFF -#endif -EOF - - # Skip the original __shfl_sync and __shfl_xor_sync lines and print the rest - tail -n +$((SHFL_LINE + 2)) "$VENDOR_HIP_FILE" - -} > "$VENDOR_HIP_FILE.tmp" - -mv "$VENDOR_HIP_FILE.tmp" "$VENDOR_HIP_FILE" - -echo " βœ“ Added conditional GGML_HIP_WARP_MASK macro to vendor header" - -echo "" -echo "Step 2: Replacing hardcoded warp masks in CUDA files..." - -# Find all .cu and .cuh files in the ggml/src/ggml-cuda directory -CUDA_FILES=($(find "$LLAMA_DIR/ggml/src/ggml-cuda" -name "*.cu" -o -name "*.cuh" 2>/dev/null | sort)) - -if [[ ${#CUDA_FILES[@]} -eq 0 ]]; then - echo "Warning: No CUDA files found in $LLAMA_DIR/ggml/src/ggml-cuda" - echo "This may be expected for some llama.cpp versions" -else - MODIFIED_COUNT=0 - - for file in "${CUDA_FILES[@]}"; do - # Check if file contains the hardcoded masks - if grep -q "0xFFFFFFFF\|0xffffffff" "$file" 2>/dev/null; then - # Create backup - cp "$file" "$file.backup" - - # Replace both uppercase and lowercase versions - sed -i 's/0xFFFFFFFF/GGML_HIP_WARP_MASK/g; s/0xffffffff/GGML_HIP_WARP_MASK/g' "$file" - - MODIFIED_COUNT=$((MODIFIED_COUNT + 1)) - echo " βœ“ Modified: $(basename "$file")" - fi - done - - echo " βœ“ Modified $MODIFIED_COUNT CUDA files" -fi - -echo "" -echo "Step 3: Verification..." - -# Verify the vendor header was modified correctly -if grep -q "GGML_HIP_ROCWMMA_FATTN" "$VENDOR_HIP_FILE" && grep -q "GGML_HIP_WARP_MASK" "$VENDOR_HIP_FILE"; then - echo " βœ“ Vendor header modification verified" -else - echo " βœ— Vendor header modification failed" - # Restore backup - mv "$VENDOR_HIP_FILE.backup" "$VENDOR_HIP_FILE" - echo " βœ“ Restored original vendor header" - exit 1 -fi - -echo "" -echo "πŸŽ‰ rocWMMA compatibility fixes applied successfully!" -echo "" -echo "What was changed:" -echo " β€’ Added conditional GGML_HIP_WARP_MASK macro to ggml/src/ggml-cuda/vendors/hip.h" -echo " β€’ Replaced hardcoded 0xFFFFFFFF/0xffffffff with GGML_HIP_WARP_MASK in CUDA files" -echo "" -echo "Behavior:" -echo " β€’ For regular HIP builds: GGML_HIP_WARP_MASK = 0xFFFFFFFF (no change)" -echo " β€’ For rocWMMA builds: GGML_HIP_WARP_MASK = 0xFFFFFFFFFFFFFFFFULL (64-bit masks)" -echo "" -echo "To build with rocWMMA support, use:" -echo " cmake -B build -S '$LLAMA_DIR' -DGGML_HIP=ON -DAMDGPU_TARGETS=\"gfx1151\" -DGGML_HIP_ROCWMMA_FATTN=ON" -echo "" -echo "Backup files were created with .backup extension in case you need to revert." - -# Clean up backup files from CUDA directory on success -echo "" -read -p "Remove backup files? (y/N): " -n 1 -r -echo -if [[ $REPLY =~ ^[Yy]$ ]]; then - find "$LLAMA_DIR/ggml/src/ggml-cuda" -name "*.backup" -delete 2>/dev/null || true - rm -f "$VENDOR_HIP_FILE.backup" - echo " βœ“ Backup files removed" -else - echo " β„Ή Backup files kept for safety" -fi - -echo "" -echo "Done! Your llama.cpp checkout now supports rocWMMA builds." \ No newline at end of file