diff --git a/.github/workflows/build-llamacpp-rocm.yml b/.github/workflows/build-llamacpp-rocm.yml index f134c58..1ca7aa6 100644 --- a/.github/workflows/build-llamacpp-rocm.yml +++ b/.github/workflows/build-llamacpp-rocm.yml @@ -218,6 +218,30 @@ jobs: # Extract the tarball tar -xzf rocm.tar.gz -C C:\opt\rocm --strip-components=1 + - name: Install rocWMMA headers (develop) + run: | + $repoUrl = "https://github.com/ROCm/rocWMMA.git" + $cloneDir = Join-Path $env:TEMP "rocWMMA-develop" + $destIncludeDir = "C:\opt\rocm\include" + $destHeaders = Join-Path $destIncludeDir "rocwmma" + + if (Test-Path $cloneDir) { Remove-Item -Recurse -Force $cloneDir } + Write-Host "Cloning rocWMMA develop branch..." + git clone --depth 1 --branch develop $repoUrl $cloneDir + + $srcHeaders = Join-Path $cloneDir "library\include\rocwmma" + if (!(Test-Path $srcHeaders)) { + Write-Error "rocWMMA headers not found at expected path: $srcHeaders" + exit 1 + } + + Write-Host "Installing headers to $destHeaders ..." + New-Item -ItemType Directory -Force -Path $destIncludeDir | Out-Null + if (Test-Path $destHeaders) { Remove-Item -Recurse -Force $destHeaders } + Copy-Item -Recurse -Force $srcHeaders $destHeaders + + Write-Host "rocWMMA headers (develop) installed successfully." + - name: Clone llama.cpp run: | git clone https://github.com/ggerganov/llama.cpp.git @@ -254,6 +278,13 @@ jobs: } else { Write-Host "Warning: $hipFile not found" } + + - name: Apply rocWMMA patch (non-interactive) + shell: bash + run: | + chmod +x utils/rocwmma_patch.sh + # Run non-interactively; keep backups (answer 'n') + printf "n\n" | ./utils/rocwmma_patch.sh ./llama.cpp - name: Build Llama.cpp + ROCm shell: cmd @@ -301,7 +332,7 @@ jobs: -DGGML_HIP=ON ^ -DGGML_OPENMP=OFF ^ -DGGML_CUDA_FORCE_CUBLAS=OFF ^ - -DGGML_HIP_ROCWMMA_FATTN=OFF ^ + -DGGML_HIP_ROCWMMA_FATTN=ON ^ -DGGML_HIP_FORCE_ROCWMMA_FATTN_GFX12=OFF ^ -DLLAMA_CURL=OFF ^ -DGGML_NATIVE=OFF ^ @@ -495,7 +526,7 @@ jobs: run: | echo "Installing build dependencies..." sudo apt update - sudo apt install -y cmake ninja-build + sudo apt install -y cmake ninja-build unzip curl # Verify installations echo "Verifying installations..." @@ -581,6 +612,24 @@ jobs: echo "ROCm environment variables set successfully" + - name: Install rocWMMA headers (develop) + run: | + set -e + TMP_DIR="$(mktemp -d)" + echo "Using temp directory: $TMP_DIR" + git clone --depth 1 --branch develop https://github.com/ROCm/rocWMMA.git "$TMP_DIR/rocWMMA" + SRC_DIR="$TMP_DIR/rocWMMA/library/include/rocwmma" + if [ ! -d "$SRC_DIR" ]; then + echo "rocWMMA headers not found at expected path: $SRC_DIR" + exit 1 + fi + echo "Installing headers to /opt/rocm/include/rocwmma ..." + sudo mkdir -p /opt/rocm/include + sudo rm -rf /opt/rocm/include/rocwmma + sudo cp -r "$SRC_DIR" /opt/rocm/include/ + rm -rf "$TMP_DIR" + echo "rocWMMA headers (develop) installed successfully." + - name: Clone llama.cpp run: | git clone https://github.com/ggerganov/llama.cpp.git @@ -615,6 +664,12 @@ jobs: else echo "Warning: $hip_file not found" fi + + - name: Apply rocWMMA patch (non-interactive) + run: | + chmod +x utils/rocwmma_patch.sh + # Run non-interactively; keep backups (answer 'n') + printf "n\n" | ./utils/rocwmma_patch.sh ./llama.cpp - name: Build Llama.cpp + ROCm run: | @@ -650,7 +705,7 @@ jobs: -DGGML_HIP=ON \ -DGGML_OPENMP=OFF \ -DGGML_CUDA_FORCE_CUBLAS=OFF \ - -DGGML_HIP_ROCWMMA_FATTN=OFF \ + -DGGML_HIP_ROCWMMA_FATTN=ON \ -DGGML_HIP_FORCE_ROCWMMA_FATTN_GFX12=OFF \ -DLLAMA_CURL=OFF \ -DGGML_NATIVE=OFF \ diff --git a/docs/manual_instructions.md b/docs/manual_instructions.md index 209212d..f40b03a 100644 --- a/docs/manual_instructions.md +++ b/docs/manual_instructions.md @@ -57,7 +57,7 @@ set HIP_PLATFORM=amd cd "C:\\llama.cpp" mkdir build cd build -cmake .. -G Ninja -DCMAKE_C_COMPILER="C:\opt\rocm\lib\llvm\bin\clang.exe" -DCMAKE_CXX_COMPILER="C:\opt\rocm\lib\llvm\bin\clang++.exe" -DCMAKE_CROSSCOMPILING=ON -DCMAKE_BUILD_TYPE=Release -DAMDGPU_TARGETS="gfx1151" -DBUILD_SHARED_LIBS=ON -DLLAMA_BUILD_TESTS=OFF -DGGML_HIP=ON -DGGML_OPENMP=OFF -DGGML_CUDA_FORCE_CUBLAS=OFF -DGGML_HIP_ROCWMMA_FATTN=OFF -DGGML_HIP_FORCE_ROCWMMA_FATTN_GFX12=OFF -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DGGML_STATIC=OFF -DCMAKE_SYSTEM_NAME=Windows +cmake .. -G Ninja -DCMAKE_C_COMPILER="C:\opt\rocm\lib\llvm\bin\clang.exe" -DCMAKE_CXX_COMPILER="C:\opt\rocm\lib\llvm\bin\clang++.exe" -DCMAKE_CROSSCOMPILING=ON -DCMAKE_BUILD_TYPE=Release -DAMDGPU_TARGETS="gfx1151" -DBUILD_SHARED_LIBS=ON -DLLAMA_BUILD_TESTS=OFF -DGGML_HIP=ON -DGGML_OPENMP=OFF -DGGML_CUDA_FORCE_CUBLAS=OFF -DGGML_HIP_ROCWMMA_FATTN=ON -DGGML_HIP_FORCE_ROCWMMA_FATTN_GFX12=OFF -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DGGML_STATIC=OFF -DCMAKE_SYSTEM_NAME=Windows cmake --build . -j 24 2>&1 | findstr /i "error" ``` @@ -149,7 +149,7 @@ cmake .. -G Ninja \ -DGGML_HIP=ON \ -DGGML_OPENMP=OFF \ -DGGML_CUDA_FORCE_CUBLAS=OFF \ - -DGGML_HIP_ROCWMMA_FATTN=OFF \ + -DGGML_HIP_ROCWMMA_FATTN=ON \ -DGGML_HIP_FORCE_ROCWMMA_FATTN_GFX12=OFF \ -DLLAMA_CURL=OFF \ -DGGML_NATIVE=OFF \ diff --git a/utils/rocwmma_patch.sh b/utils/rocwmma_patch.sh new file mode 100644 index 0000000..2cd582a --- /dev/null +++ b/utils/rocwmma_patch.sh @@ -0,0 +1,165 @@ +#!/bin/bash + +# rocwmma_patch.sh - Apply rocWMMA compatibility fixes to llama.cpp +# Usage: ./rocwmma_patch.sh + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +LLAMA_DIR="${1:-}" + +if [[ -z "$LLAMA_DIR" ]]; then + echo "Usage: $0 " + echo "" + echo "This script applies rocWMMA compatibility fixes to a llama.cpp checkout." + echo "The fixes resolve warp synchronization mask type conflicts between" + echo "ROCm headers and CUDA-style code when building with GGML_HIP_ROCWMMA_FATTN=ON." + echo "" + echo "Example:" + echo " $0 ./llama.cpp" + echo " $0 /path/to/your/llama.cpp" + exit 1 +fi + +if [[ ! -d "$LLAMA_DIR" ]]; then + echo "Error: Directory '$LLAMA_DIR' does not exist" + exit 1 +fi + +if [[ ! -f "$LLAMA_DIR/CMakeLists.txt" ]] || ! grep -q "llama" "$LLAMA_DIR/CMakeLists.txt" 2>/dev/null; then + echo "Error: '$LLAMA_DIR' does not appear to be a llama.cpp directory" + echo "Expected to find CMakeLists.txt with 'llama' references" + exit 1 +fi + +VENDOR_HIP_FILE="$LLAMA_DIR/ggml/src/ggml-cuda/vendors/hip.h" + +if [[ ! -f "$VENDOR_HIP_FILE" ]]; then + echo "Error: HIP vendor header not found at: $VENDOR_HIP_FILE" + echo "This script requires a llama.cpp version with HIP support" + exit 1 +fi + +echo "Applying rocWMMA compatibility fixes to: $LLAMA_DIR" +echo "" + +# Check if fixes are already applied +if grep -q "GGML_HIP_WARP_MASK" "$VENDOR_HIP_FILE" 2>/dev/null; then + echo "rocWMMA fixes appear to already be applied (found GGML_HIP_WARP_MASK)" + echo "To reapply, please first revert changes and run this script again" + exit 0 +fi + +echo "Step 1: Modifying HIP vendor header..." + +# Backup the original file +cp "$VENDOR_HIP_FILE" "$VENDOR_HIP_FILE.backup" + +# Find the line with __shfl_sync and __shfl_xor_sync definitions +SHFL_LINE=$(grep -n "^#define __shfl_sync" "$VENDOR_HIP_FILE" | head -1 | cut -d: -f1) + +if [[ -z "$SHFL_LINE" ]]; then + echo "Error: Could not find __shfl_sync macro definition in $VENDOR_HIP_FILE" + echo "This script may need updates for this version of llama.cpp" + exit 1 +fi + +# Create a temporary file with the fix +{ + # Print lines before the __shfl_sync definition + head -n $((SHFL_LINE - 1)) "$VENDOR_HIP_FILE" + + # Add our conditional compilation block + cat << 'EOF' +#ifdef GGML_HIP_ROCWMMA_FATTN +// ROCm requires 64-bit masks for __shfl_*_sync functions +#define GGML_HIP_WARP_MASK 0xFFFFFFFFFFFFFFFFULL +#else +#define __shfl_sync(mask, var, laneMask, width) __shfl(var, laneMask, width) +#define __shfl_xor_sync(mask, var, laneMask, width) __shfl_xor(var, laneMask, width) +#define GGML_HIP_WARP_MASK 0xFFFFFFFF +#endif +EOF + + # Skip the original __shfl_sync and __shfl_xor_sync lines and print the rest + tail -n +$((SHFL_LINE + 2)) "$VENDOR_HIP_FILE" + +} > "$VENDOR_HIP_FILE.tmp" + +mv "$VENDOR_HIP_FILE.tmp" "$VENDOR_HIP_FILE" + +echo " ✓ Added conditional GGML_HIP_WARP_MASK macro to vendor header" + +echo "" +echo "Step 2: Replacing hardcoded warp masks in CUDA files..." + +# Find all .cu and .cuh files in the ggml/src/ggml-cuda directory +CUDA_FILES=($(find "$LLAMA_DIR/ggml/src/ggml-cuda" -name "*.cu" -o -name "*.cuh" 2>/dev/null | sort)) + +if [[ ${#CUDA_FILES[@]} -eq 0 ]]; then + echo "Warning: No CUDA files found in $LLAMA_DIR/ggml/src/ggml-cuda" + echo "This may be expected for some llama.cpp versions" +else + MODIFIED_COUNT=0 + + for file in "${CUDA_FILES[@]}"; do + # Check if file contains the hardcoded masks + if grep -q "0xFFFFFFFF\|0xffffffff" "$file" 2>/dev/null; then + # Create backup + cp "$file" "$file.backup" + + # Replace both uppercase and lowercase versions + sed -i 's/0xFFFFFFFF/GGML_HIP_WARP_MASK/g; s/0xffffffff/GGML_HIP_WARP_MASK/g' "$file" + + MODIFIED_COUNT=$((MODIFIED_COUNT + 1)) + echo " ✓ Modified: $(basename "$file")" + fi + done + + echo " ✓ Modified $MODIFIED_COUNT CUDA files" +fi + +echo "" +echo "Step 3: Verification..." + +# Verify the vendor header was modified correctly +if grep -q "GGML_HIP_ROCWMMA_FATTN" "$VENDOR_HIP_FILE" && grep -q "GGML_HIP_WARP_MASK" "$VENDOR_HIP_FILE"; then + echo " ✓ Vendor header modification verified" +else + echo " ✗ Vendor header modification failed" + # Restore backup + mv "$VENDOR_HIP_FILE.backup" "$VENDOR_HIP_FILE" + echo " ✓ Restored original vendor header" + exit 1 +fi + +echo "" +echo "🎉 rocWMMA compatibility fixes applied successfully!" +echo "" +echo "What was changed:" +echo " • Added conditional GGML_HIP_WARP_MASK macro to ggml/src/ggml-cuda/vendors/hip.h" +echo " • Replaced hardcoded 0xFFFFFFFF/0xffffffff with GGML_HIP_WARP_MASK in CUDA files" +echo "" +echo "Behavior:" +echo " • For regular HIP builds: GGML_HIP_WARP_MASK = 0xFFFFFFFF (no change)" +echo " • For rocWMMA builds: GGML_HIP_WARP_MASK = 0xFFFFFFFFFFFFFFFFULL (64-bit masks)" +echo "" +echo "To build with rocWMMA support, use:" +echo " cmake -B build -S '$LLAMA_DIR' -DGGML_HIP=ON -DAMDGPU_TARGETS=\"gfx1151\" -DGGML_HIP_ROCWMMA_FATTN=ON" +echo "" +echo "Backup files were created with .backup extension in case you need to revert." + +# Clean up backup files from CUDA directory on success +echo "" +read -p "Remove backup files? (y/N): " -n 1 -r +echo +if [[ $REPLY =~ ^[Yy]$ ]]; then + find "$LLAMA_DIR/ggml/src/ggml-cuda" -name "*.backup" -delete 2>/dev/null || true + rm -f "$VENDOR_HIP_FILE.backup" + echo " ✓ Backup files removed" +else + echo " ℹ Backup files kept for safety" +fi + +echo "" +echo "Done! Your llama.cpp checkout now supports rocWMMA builds." \ No newline at end of file