Build matrix with llvm-build #105
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Integration Tests | |
on: | |
workflow_dispatch: | |
pull_request: | |
branches: [main] | |
merge_group: | |
branches: [main] | |
types: [checks_requested] | |
concurrency: | |
group: ${{ github.ref }} | |
cancel-in-progress: ${{ github.ref != 'refs/heads/master' }} | |
env: | |
TRITON_USE_ASSERT_ENABLED_LLVM: "TRUE" | |
USE_LOCAL_LLVM: "" | |
# use specific LLVM | |
#USE_LOCAL_LLVM: "https://tritonlang.blob.core.windows.net/llvm-builds/llvm-5e5a22ca-windows-x64.tar.gz" | |
jobs: | |
Runner-Preparation: | |
runs-on: ubuntu-latest | |
timeout-minutes: 30 | |
outputs: | |
matrix-required: ${{ steps.set-matrix.outputs.matrix-required }} | |
matrix-optional: ${{ steps.set-matrix.outputs.matrix-optional }} | |
steps: | |
- name: Prepare matrix | |
id: set-matrix | |
run: | | |
if [ x"${{ github.repository }}" == x"openai/triton" ]; then | |
echo 'matrix-required={"self-hosted": ["true"], "runner": [["self-hosted", "A100"], ["self-hosted", "H100"]], "python-version": ["3.11"], "cuda-version": ["12.1"], "include":[{"cc": "clang", "cpp": "clang++"}]}' >> "$GITHUB_OUTPUT" | |
echo 'matrix-optional={"self-hosted": ["true"], "runner": [["self-hosted", "gfx908"], ["self-hosted", "arc770"]], "python-version": ["3.11"], "cuda-version": ["12.1"], "include":[{"cc": "clang", "cpp": "clang++"}]}' >> "$GITHUB_OUTPUT" | |
else | |
echo 'matrix-required={"runner":["ubuntu-latest", "windows-latest"], "self-hosted": ["false"], "python-version": ["3.10", "3.11"], "cuda-version": ["11.8"], "include":[{"runner": "ubuntu-latest", "cc": "clang", "cpp":"clang++"}, {"runner": "windows-latest", "cc": "cl", "cpp": "cl"}]}' >> "$GITHUB_OUTPUT" | |
echo 'matrix-optional={"runner":["ubuntu-latest", "windows-latest"], "self-hosted": ["false"], "python-version": ["3.10", "3.11"], "cuda-version": ["11.8"], "include":[{"runner": "ubuntu-latest", "cc": "clang", "cpp":"clang++"}, {"runner": "windows-latest", "cc": "cl", "cpp": "cl"}]}' >> "$GITHUB_OUTPUT" | |
fi | |
Integration-Tests: | |
needs: Runner-Preparation | |
runs-on: ${{ matrix.runner }} | |
timeout-minutes: 60 | |
strategy: | |
matrix: ${{fromJson(needs.Runner-Preparation.outputs.matrix-required)}} | |
steps: | |
- name: Checkout | |
uses: actions/checkout@v3 | |
with: | |
submodules: "true" | |
- name: Set CUDA ENV | |
if: ${{(matrix.self-hosted == 'true') && (matrix.runner[1] == 'V100' || matrix.runner[1] == 'A100' || matrix.runner[1] == 'H100')}} | |
run: | | |
echo "BACKEND=CUDA" >> "${GITHUB_ENV}" | |
echo "ENABLE_TMA=0" >> "${GITHUB_ENV}" | |
echo "TRITON_DISABLE_LINE_INFO=1" >> "${GITHUB_ENV}" | |
- name: Set up Python ${{ matrix.python-version }} | |
if: ${{(matrix.self-hosted != 'true')}} | |
uses: actions/setup-python@v3 | |
with: | |
python-version: ${{ matrix.python-version }} | |
- name: Set up MSVC | |
if: ${{(matrix.runner == 'windows-latest')}} | |
uses: ilammy/[email protected] | |
with: | |
arch: amd64 | |
- name: Setup Mambaforge (Windows) | |
if: ${{(matrix.runner == 'windows-latest')}} | |
uses: conda-incubator/setup-miniconda@v3 | |
with: | |
miniforge-variant: Mambaforge | |
miniforge-version: latest | |
activate-environment: triton-env | |
use-mamba: true | |
- uses: conda-incubator/setup-miniconda@v3 | |
if: ${{(matrix.runner == 'windows-latest')}} | |
with: | |
activate-environment: triton-env | |
environment-file: environment.yml | |
auto-activate-base: true | |
python-version: ${{ matrix.python-version }} | |
- name: set Environment Variables (Windows) | |
if: ${{(matrix.runner == 'windows-latest')}} | |
shell: bash | |
run: | | |
LLVM_SHORTHASH="$(cat cmake/llvm-hash.txt | cut -c1-8)" | |
# prepare LLVM install path installed by setup.py | |
echo "~/.triton/llvm/llvm-$LLVM_SHORTHASH-windows-x64/bin" >> "$GITHUB_PATH" | |
# compile with MSVC | |
echo "CC=cl" >> "${GITHUB_ENV}" | |
echo "CXX=cl" >> "${GITHUB_ENV}" | |
- name: Get Date (Windows) | |
if: ${{(matrix.runner == 'windows-latest')}} | |
id: get-date | |
run: echo "today=$(/bin/date -u '+%Y%m%d')" >> $GITHUB_OUTPUT | |
shell: bash | |
- name: Cache conda env (Windows) | |
if: ${{(matrix.runner == 'windows-latest')}} | |
id: cache | |
uses: actions/cache@v3 | |
env: | |
# Increase this value to reset cache if environment.yml has not changed | |
CACHE_NUMBER: 0 | |
with: | |
path: ${{ env.CONDA }}/envs | |
key: | |
${{ matrix.runner }}--${{ steps.get-date.outputs.today }}--conda-${{ env.CACHE_NUMBER }}-cp${{ matrix.python-version }}-${{ hashFiles('environment.yml') }} | |
- name: Update conda environment (Windows) | |
if: ${{(matrix.runner == 'windows-latest')}} | |
shell: bash | |
run: | | |
if [ "${{ steps.cache.outputs.cache-hit }}" != "true" ]; then | |
mamba env update -n triton-env -f environment.yml | |
cat environment.yml | |
fi | |
- name: Update environment | |
if: ${{(matrix.self-hosted != 'true')}} | |
shell: bash | |
run: | | |
echo "BACKEND=CUDA" >> "${GITHUB_ENV}" | |
echo "ENABLE_TMA=0" >> "${GITHUB_ENV}" | |
echo "TRITON_DISABLE_LINE_INFO=1" >> "${GITHUB_ENV}" | |
- name: Set reusable strings | |
# Turn repeated input strings (such as the build output directory) into step outputs. These step outputs can be used throughout the workflow file. | |
id: strings | |
shell: bash | |
run: | | |
echo "build-output-dir=${{ github.workspace }}/build" >> "$GITHUB_OUTPUT" | |
- name: Clear cache | |
shell: bash | |
run: | | |
rm -rf ~/.triton | |
- name: Update PATH | |
if: ${{(matrix.self-hosted == 'true')}} | |
run: | | |
echo "PATH=${HOME}/.local/bin:${PATH}" >> "${GITHUB_ENV}" | |
- name: Check pre-commit | |
run: | | |
python3 -m pip install --upgrade pre-commit | |
python3 -m pre_commit run --all-files --verbose | |
- name: Install Triton | |
if: ${{(matrix.runner != 'windows-latest')}} | |
run: | | |
cd python | |
python3 -m pip install --upgrade pip | |
python3 -m pip install cmake==3.24 ninja pytest-xdist wheel | |
sudo apt-get update -y | |
sudo apt-get install -y ccache clang lld | |
TRITON_BUILD_WITH_CLANG_LLD=true TRITON_BUILD_WITH_CCACHE=true python3 -m pip install --no-build-isolation -vvv '.[tests]' | |
if [ "${{ matrix.runner }}" = 'ubuntu-latest' ]; then | |
python3 setup.py bdist_wheel | |
fi | |
- name: Install LLVM (Windows) | |
if: ${{(matrix.runner == 'windows-latest') && env.USE_LOCAL_LLVM != ''}} | |
shell: bash | |
run: | | |
# use the official LLVM prebuilt | |
if [ "$USE_LOCAL_LLVM" = "OFFICIAL" ]; then | |
LLVM_SHORTHASH="$(cat cmake/llvm-hash.txt | cut -c1-8)" | |
curl -L -O https://tritonlang.blob.core.windows.net/llvm-builds/llvm-$LLVM_SHORTHASH-windows-x64.tar.gz | |
mkdir -p LLVM | |
tar xvf llvm-$LLVM_SHORTHASH-windows-x64.tar.gz --strip-components=1 -c LLVM | |
echo "LLVM_SYSPATH=${{ github.workspace }}\\LLVM" >> "$GITHUB_ENV" | |
# comment out the following line to compile with msvc | |
echo "${{github.workspace }}\\LLVM\\bin" >> "$GITHUB_PATH" | |
rm -f llvm-$LLVM_SHORTHASH-windows-x64.tar.gz | |
fi | |
if [ "$USE_LOCAL_LLVM" != "" ]; then | |
curl -L -O $USE_LOCAL_LLVM | |
mkdir -p LLVM | |
tarfile=$(basename $USE_LOCAL_LLVM) | |
tar xvf $tarfile --strip-components=1 -c LLVM | |
echo "LLVM_SYSPATH=${{ github.workspace }}\\LLVM" >> "$GITHUB_ENV" | |
# comment out the following line to compile with msvc | |
echo "${{github.workspace }}\\LLVM\\bin" >> "$GITHUB_PATH" | |
rm -f $tarfile | |
fi | |
- name: Install Triton (Windows) | |
if: ${{(matrix.runner == 'windows-latest')}} | |
run: | | |
cd python | |
python3 -m pip install --upgrade pip | |
python3 -m pip install cmake==3.24 ninja pytest-xdist wheel | |
python3 -m pip install --no-build-isolation -vvv . | |
python3 setup.py bdist_wheel | |
- name: Run lit tests | |
if: ${{(matrix.self-hosted == 'true') && env.BACKEND == 'CUDA'}} | |
run: | | |
python3 -m pip install lit | |
cd python | |
LIT_TEST_DIR="build/$(ls build | grep -i cmake)/test" | |
if [ ! -d "${LIT_TEST_DIR}" ]; then | |
echo "Coult not find '${LIT_TEST_DIR}'" ; exit -1 | |
fi | |
lit -v "${LIT_TEST_DIR}" | |
- name: Enable TMA | |
if: ${{(matrix.self-hosted == 'true') && (matrix.runner[1] == 'H100')}} | |
run: | | |
echo "ENABLE_TMA=1" >> "${GITHUB_ENV}" | |
- name: Run python tests on CUDA with ENABLE_TMA=1 | |
if: ${{(matrix.self-hosted == 'true') && env.BACKEND == 'CUDA' && env.ENABLE_TMA == '1'}} | |
run: | | |
cd python/test/unit | |
python3 -m pytest -n 8 --ignore=runtime --ignore=operators --ignore=language/test_line_info.py --ignore=language/test_subprocess.py | |
python3 -m pytest -n 8 language/test_subprocess.py | |
# run runtime tests serially to avoid race condition with cache handling. | |
python3 -m pytest runtime/ | |
# run test_line_info.py separately with TRITON_DISABLE_LINE_INFO=0 | |
TRITON_DISABLE_LINE_INFO=0 python3 -m pytest language/test_line_info.py | |
#run hopper/test_flashattention.py to avoid out of gpu memory | |
python3 -m pytest hopper/test_flashattention.py | |
- name: Run python tests on CUDA with ENABLE_TMA=0 | |
if: ${{(matrix.self-hosted == 'true') && env.BACKEND == 'CUDA' && env.ENABLE_TMA == '0'}} | |
run: | | |
cd python/test/unit | |
python3 -m pytest -n 8 --ignore=runtime --ignore=hopper --ignore=operators --ignore=language/test_line_info.py | |
# run runtime tests serially to avoid race condition with cache handling. | |
python3 -m pytest runtime/ | |
# run test_line_info.py separately with TRITON_DISABLE_LINE_INFO=0 | |
TRITON_DISABLE_LINE_INFO=0 python3 -m pytest language/test_line_info.py | |
- name: Clear cache | |
shell: bash | |
run: | | |
rm -rf ~/.triton | |
- name: Run interpreter tests | |
if: ${{(matrix.self-hosted == 'true')}} | |
env: | |
# TRITON_INTERPRET: "1" | |
CUA_VISIBLE_DEVICES: "" | |
run: | | |
cd python/test/unit | |
python3 -m pytest -vs operators/test_flash_attention.py | |
- name: Run partial tests on CUDA with ENABLE_TMA=1 | |
if: ${{(matrix.self-hosted == 'true') && env.BACKEND == 'CUDA' && env.ENABLE_TMA == '1'}} | |
run: | | |
cd python/test/unit | |
python3 -m pytest -n 8 operators | |
- name: Run partial tests on CUDA with ENABLE_TMA=0 | |
if: ${{(matrix.self-hosted == 'true') && env.BACKEND == 'CUDA' && env.ENABLE_TMA == '0'}} | |
run: | | |
cd python/test/unit | |
python3 -m pytest -n 8 operators | |
- name: Upload Build artifacts (Windows) | |
if: ${{(matrix.runner == 'windows-latest')}} | |
uses: actions/upload-artifact@v2 | |
with: | |
name: triton-dist-windows | |
path: | | |
${{ github.workspace }}/python/dist/ | |
- name: Upload Build artifacts (Ubuntu) | |
if: ${{(matrix.runner == 'ubuntu-latest')}} | |
uses: actions/upload-artifact@v2 | |
with: | |
name: triton-dist-ubuntu | |
path: | | |
${{ github.workspace }}/python/dist/ | |
- name: Create artifacts archive | |
if: ${{(matrix.self-hosted == 'true') && (matrix.runner[1] == 'V100' || matrix.runner[1] == 'A100' || matrix.runner[1] == 'H100')}} | |
run: | | |
cd ~/.triton | |
tar -czf artifacts.tar.gz cache | |
- name: Upload artifacts archive | |
if: ${{(matrix.self-hosted == 'true') && (matrix.runner[1] == 'V100' || matrix.runner[1] == 'A100' || matrix.runner[1] == 'H100')}} | |
uses: actions/upload-artifact@v2 | |
with: | |
name: artifacts ${{ matrix.runner[1] }} | |
path: ~/.triton/artifacts.tar.gz | |
- name: Run CXX unittests | |
if: ${{(matrix.self-hosted == 'true') && env.BACKEND == 'CUDA'}} | |
run: | | |
cd python | |
cd "build/$(ls build | grep -i cmake)" | |
ctest | |
- name: Regression tests | |
if: ${{ (matrix.self-hosted == 'true') && contains(matrix.runner, 'A100') }} | |
run: | | |
python3 -m pip install pytest-rerunfailures | |
cd python/test/regression | |
sudo nvidia-smi -i 0 -pm 1 | |
sudo nvidia-smi -i 0 --lock-gpu-clocks=1280,1280 | |
python3 -m pytest -vs . --reruns 10 | |
sudo nvidia-smi -i 0 -rgc | |
Compare-artifacts: | |
if: ${{(github.repository == 'openai/triton')}} | |
needs: Integration-Tests | |
timeout-minutes: 5 | |
runs-on: ubuntu-latest | |
steps: | |
- name: Checkout | |
uses: actions/checkout@v2 | |
- name: Install gh CLI | |
run: | | |
sudo apt-key adv --keyserver keyserver.ubuntu.com --recv-key 23F3D4EA75716059 | |
echo "deb [arch=$(dpkg --print-architecture)] https://cli.github.com/packages focal main" | sudo tee /etc/apt/sources.list.d/github-cli.list > /dev/null | |
sudo apt update | |
sudo apt install gh | |
- name: Save PR number to a file | |
env: | |
PR_NUMBER: ${{ github.event.number }} | |
run: | | |
echo $PR_NUMBER > pr_number | |
- name: Upload PR number to artifacts | |
uses: actions/upload-artifact@v3 | |
with: | |
name: pr_number | |
path: pr_number | |
- name: Download latest main artifacts | |
env: | |
ARTIFACT_NAME: artifacts A100 | |
ARTIFACT_JOB_NAME: Integration-Tests | |
MAX_NUM_ACTIONS_PAGES: 30 | |
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
run: | | |
OWNER_REPO="${{ github.repository }}" | |
echo "OWNER_REPO: $OWNER_REPO" | |
PR_NUMBERS=($(gh api --method GET repos/$OWNER_REPO/pulls -f state=closed | jq -r ".[] | select(.merged_at != null) | .number")) | |
# Not all PRs go through integration tests | |
success=0 | |
for PR_NUMBER in "${PR_NUMBERS[@]}" | |
do | |
echo "Last merged PR number: $PR_NUMBER" | |
BRANCH_NAME=$(gh api repos/$OWNER_REPO/pulls/$PR_NUMBER --jq '.head.ref') | |
echo "BRANCH_NAME: $BRANCH_NAME" | |
USER_ID=$(gh api repos/$OWNER_REPO/pulls/$PR_NUMBER --jq '.user.id') | |
echo "USER_ID: $USER_ID" | |
run_id_found=false | |
page=1 | |
while true; do | |
if [ "$page" -gt $MAX_NUM_ACTIONS_PAGES ]; then | |
break | |
fi | |
run_id=$(gh api --method GET "repos/$OWNER_REPO/actions/runs?page=$page&per_page=100" | jq --arg branch_name "$BRANCH_NAME" --arg run_name "Integration Tests" --arg user_id "$USER_ID" '.workflow_runs[] | select(.head_branch == $branch_name and .name == $run_name and .actor.id == ($user_id | tonumber))' | jq '.id' | head -1) | |
if [ "$run_id" != "" ]; then | |
echo "First run ID on branch $BRANCH_NAME is: $run_id" | |
WORKFLOW_RUN_ID=$run_id | |
run_id_found=true | |
break | |
fi | |
((page++)) | |
done | |
if ! $run_id_found; then | |
echo "No run_id found for PR ${PR_NUMBER}, moving to the next PR." | |
continue | |
fi | |
echo "WORKFLOW_RUN_ID: $WORKFLOW_RUN_ID" | |
ARTIFACT_URL=$(gh api repos/$OWNER_REPO/actions/runs/$WORKFLOW_RUN_ID/artifacts | jq --arg artifact_name "$ARTIFACT_NAME" '.artifacts[] | select(.name == $artifact_name).archive_download_url' --raw-output) | |
echo "ARTIFACT_URL: $ARTIFACT_URL" | |
if [ -n "$ARTIFACT_URL" ]; then | |
echo "Downloading artifact: $ARTIFACT_URL" | |
curl --location --remote-header-name -H "Authorization: token $GH_TOKEN" -o reference.zip "$ARTIFACT_URL" | |
# Print the size of the downloaded artifact | |
echo "Artifact size (stat): $(stat --printf="%s bytes" reference.zip)" | |
echo "Artifact size (du): $(du -sh reference.zip)" | |
unzip reference.zip | |
tar -xzf artifacts.tar.gz | |
rm reference.zip | |
rm artifacts.tar.gz | |
mv cache reference | |
success=1 | |
break | |
fi | |
done | |
if [ $success -eq 0 ]; then | |
echo "No artifact found with the name: $ARTIFACT_NAME" | |
exit 1 | |
fi | |
- name: Download current job artifacts | |
uses: actions/download-artifact@v2 | |
with: | |
name: artifacts A100 | |
- name: Unzip current job artifacts | |
run: | | |
# Print the size of the downloaded artifact | |
echo "Artifact size (stat): $(stat --printf="%s bytes" artifacts.tar.gz)" | |
echo "Artifact size (du): $(du -sh artifacts.tar.gz)" | |
tar -xzf artifacts.tar.gz | |
rm artifacts.tar.gz | |
mv cache current | |
- name: Compare artifacts | |
run: | | |
set +e | |
python3 python/test/tools/compare_files.py --path1 reference --path2 current | |
exit_code=$? | |
set -e | |
echo $exit_code | |
if [ $exit_code -eq 0 ]; then | |
echo "Artifacts are identical" | |
echo "COMPARISON_RESULT=true" >> $GITHUB_ENV | |
elif [ $exit_code -eq 1 ]; then | |
echo "Artifacts are different" | |
echo "COMPARISON_RESULT=false" >> $GITHUB_ENV | |
else | |
echo "Error while comparing artifacts" | |
echo "COMPARISON_RESULT=error" >> $GITHUB_ENV | |
fi | |
- name: Check comparison result and write to file | |
run: | | |
if [ "${{ env.COMPARISON_RESULT }}" = "true" ]; then | |
echo "SUCCESS" > comparison_result | |
else | |
echo "FAILED" > comparison_result | |
fi | |
- name: Upload comparison result to artifacts | |
uses: actions/upload-artifact@v3 | |
with: | |
name: comparison_result | |
path: comparison_result | |
- name: Upload results as artifact | |
uses: actions/upload-artifact@v2 | |
with: | |
name: kernels-reference-check | |
path: kernels_reference_check.txt |