Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
117 changes: 115 additions & 2 deletions .github/workflows/integration-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ jobs:
outputs:
matrix-CUDA: ${{ steps.set-matrix.outputs.matrix-CUDA }}
matrix-HIP: ${{ steps.set-matrix.outputs.matrix-HIP }}
matrix-MACOS: ${{ steps.set-matrix.outputs.matrix-MACOS }}
steps:
- name: Decide pre-submit integration test enablement
# Always enable integration tests for pre-submit pull requests.
Expand Down Expand Up @@ -106,9 +107,11 @@ jobs:
if [ x"${{ github.repository }}" == x"triton-lang/triton" ]; then
echo '::set-output name=matrix-CUDA::[["self-hosted", "A100"], ["self-hosted", "H100"]]'
echo '::set-output name=matrix-HIP::[["self-hosted", "gfx90a"]]'
echo '::set-output name=matrix-MACOS::[["macos-latest"]]'
else
echo '::set-output name=matrix-CUDA::["ubuntu-latest"]'
echo '::set-output name=matrix-HIP::["ubuntu-latest"]'
echo '::set-output name=matrix-MACOS::[["macos-latest"]]'
fi
pre-commit:
name: pre-commit (code formatting)
Expand Down Expand Up @@ -165,6 +168,7 @@ jobs:
echo "llvm=$(cat cmake/llvm-hash.txt | cut -c 1-8)" >> $GITHUB_OUTPUT
echo "pybind11=$(cat cmake/pybind11-version.txt)" >> $GITHUB_OUTPUT
echo "nvidia=$(cat cmake/nvidia-toolchain-version.txt)" >> $GITHUB_OUTPUT
echo "json=$(cat cmake/json-version.txt)" >> $GITHUB_OUTPUT
echo "datetime=$(date -u -Iseconds)" >> $GITHUB_OUTPUT
shell: bash
- name: Cache build dependencies
Expand All @@ -176,7 +180,8 @@ jobs:
~/.triton/llvm
~/.triton/nvidia
~/.triton/pybind11
key: ${{ runner.os }}-${{ runner.arch }}-llvm-${{ steps.cache-key.outputs.llvm }}-nvidia-${{ steps.cache-key.outputs.nvidia }}-pybind11-${{ steps.cache-key.outputs.pybind11 }}
~/.triton/json
key: ${{ runner.os }}-${{ runner.arch }}-llvm-${{ steps.cache-key.outputs.llvm }}-nvidia-${{ steps.cache-key.outputs.nvidia }}-pybind11-${{ steps.cache-key.outputs.pybind11 }}-json-${{ steps.cache-key.outputs.json }}
- # Cache ~/.triton/cache because the vast majority of unit test time is
# spent compiling. Triton won't (well, should not) use these cached files
# if something internal to Triton changes, because Triton's internal
Expand Down Expand Up @@ -301,6 +306,7 @@ jobs:
echo "llvm=$(cat cmake/llvm-hash.txt | cut -c 1-8)" >> $GITHUB_OUTPUT
echo "pybind11=$(cat cmake/pybind11-version.txt)" >> $GITHUB_OUTPUT
echo "nvidia=$(cat cmake/nvidia-toolchain-version.txt)" >> $GITHUB_OUTPUT
echo "json=$(cat cmake/json-version.txt)" >> $GITHUB_OUTPUT
echo "datetime=$(date -u -Iseconds)" >> $GITHUB_OUTPUT
shell: bash
- name: Cache build dependencies
Expand All @@ -312,7 +318,8 @@ jobs:
~/.triton/llvm
~/.triton/nvidia
~/.triton/pybind11
key: ${{ runner.os }}-${{ runner.arch }}-llvm-${{ steps.cache-key.outputs.llvm }}-nvidia-${{ steps.cache-key.outputs.nvidia }}-pybind11-${{ steps.cache-key.outputs.pybind11 }}
~/.triton/json
key: ${{ runner.os }}-${{ runner.arch }}-llvm-${{ steps.cache-key.outputs.llvm }}-nvidia-${{ steps.cache-key.outputs.nvidia }}-pybind11-${{ steps.cache-key.outputs.pybind11 }}-json-${{ steps.cache-key.outputs.json }}
- # Cache ~/.triton/cache because the vast majority of unit test time is
# spent compiling. Triton won't (well, should not) use these cached files
# if something internal to Triton changes, because Triton's internal
Expand Down Expand Up @@ -398,6 +405,112 @@ jobs:
ls -alh ~/.triton
du -sh ~/.triton/**

mkdir -p ~/.cache/ccache
ls -alh ~/.cache/ccache
du -sh ~/.cache/ccache
Build-Tests:
needs: Runner-Preparation
if: needs.Runner-Preparation.outputs.matrix-MACOS != ''
runs-on: ${{ matrix.runner }}
timeout-minutes: 30
strategy:
matrix:
runner: ${{fromJson(needs.Runner-Preparation.outputs.matrix-MACOS)}}
steps:
- name: Checkout
uses: actions/checkout@v4
with:
submodules: "true"
- name: Install brew dependencies
run: |
brew update
brew install ccache llvm
- name: Compute cache keys
id: cache-key
run: |
echo "llvm=$(cat cmake/llvm-hash.txt | cut -c 1-8)" >> $GITHUB_OUTPUT
echo "pybind11=$(cat cmake/pybind11-version.txt)" >> $GITHUB_OUTPUT
echo "nvidia=$(cat cmake/nvidia-toolchain-version.txt)" >> $GITHUB_OUTPUT
echo "json=$(cat cmake/json-version.txt)" >> $GITHUB_OUTPUT
echo "datetime=$(date -u -Iseconds)" >> $GITHUB_OUTPUT
shell: bash
- name: Cache build dependencies
uses: actions/cache@v4
with:
# Note that we cannot use environment variables here given there is
# no shell to interpret them in the paths.
path: |
~/.triton/llvm
~/.triton/nvidia
~/.triton/pybind11
~/.triton/json
key: ${{ runner.os }}-${{ runner.arch }}-llvm-${{ steps.cache-key.outputs.llvm }}-nvidia-${{ steps.cache-key.outputs.nvidia }}-pybind11-${{ steps.cache-key.outputs.pybind11 }}-json-${{ steps.cache-key.outputs.json }}
- # Cache ~/.triton/cache because the vast majority of unit test time is
# spent compiling. Triton won't (well, should not) use these cached files
# if something internal to Triton changes, because Triton's internal
# source code is part of the cache key.
#
# Similarly, cache ~/.cache/ccache to speed up compilation.
#
# On branch `main` we always start from an empty cache, i.e. we skip the
# "restore" step. This is to prevent the caches from accumulating stale
# files over time.
name: Restore cache of ccache and Triton compilation artifacts
if: github.event_name != 'push'
uses: actions/cache/restore@v4
with:
path: |
~/.triton/cache
~/.cache/ccache
# Restore the most recent cache entry.
restore-keys: triton-artifacts-${{ runner.os }}-${{ runner.arch }}-${{ runner.name }}-llvm-${{ steps.cache-key.outputs.llvm }}-
# We expect this cache key never to hit and for us to fall back
# unconditionally to the restore-key, so it doesn't actually matter
# what we put here (so long as it doesn't hit an existing key).
key: triton-artifacts-${{ runner.os }}-${{ runner.arch }}-${{ runner.name }}-llvm-${{ steps.cache-key.outputs.llvm }}-${{ steps.cache-key.outputs.datetime }}
- name: Inspect cache directory
run: |
mkdir -p ~/.triton
ls -alh ~/.triton
- name: Update PATH
run: |
echo "$HOME/.local/bin" >> $GITHUB_PATH
echo "/opt/homebrew/opt/llvm/bin" >> $GITHUB_PATH
- name: Install pip dependencies
run: |
python3 -m venv ~/.venv
source ~/.venv/bin/activate
python3 -m pip install --upgrade pip
python3 -m pip install cython setuptools wheel cmake==3.24 ninja pytest-xdist lit
- name: Install Triton
env:
TRITON_BUILD_WITH_CCACHE: "true"
TRITON_BUILD_WITH_O1: "true"
# macos-latest has 3 vcpus and 7GB DRAM, to save memory we limit the number of jobs to 3
# https://docs.github.com/en/actions/using-github-hosted-runners/about-github-hosted-runners/about-github-hosted-runners#standard-github-hosted-runners-for-public-repositories
MAX_JOBS: 3
run: |
source ~/.venv/bin/activate
echo "PATH is '$PATH'"
cd python
python3 -m pip install --no-build-isolation .
- # If we're on branch `main`, save the ccache Triton compilation artifacts
# to the cache so they can be used by other (non-main) CI runs.
#
# (It wouldn't be a problem to save the cache on every run, because github
# evicts cache entries LRU, but maybe this saves a bit of time in CI.)
name: Save ccache and Triton compilation artifacts to cache
if: github.ref == 'refs/heads/main'
uses: actions/cache/save@v4
with:
path: ~/.triton/cache ~/.cache/ccache
key: triton-artifacts-${{ runner.os }}-${{ runner.arch }}-${{ runner.name }}-llvm-${{ steps.cache-key.outputs.llvm }}-${{ steps.cache-key.outputs.datetime }}
- name: Inspect cache directories
run: |
mkdir -p ~/.triton
ls -alh ~/.triton
du -sh ~/.triton/**

mkdir -p ~/.cache/ccache
ls -alh ~/.cache/ccache
du -sh ~/.cache/ccache
57 changes: 56 additions & 1 deletion .github/workflows/integration-tests.yml.in
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ jobs:
outputs:
matrix-CUDA: ${{ steps.set-matrix.outputs.matrix-CUDA }}
matrix-HIP: ${{ steps.set-matrix.outputs.matrix-HIP }}
matrix-MACOS: ${{ steps.set-matrix.outputs.matrix-MACOS }}
steps:
- name: Decide pre-submit integration test enablement
# Always enable integration tests for pre-submit pull requests.
Expand Down Expand Up @@ -114,9 +115,11 @@ jobs:
if [ x"${{ github.repository }}" == x"triton-lang/triton" ]; then
echo '::set-output name=matrix-CUDA::[["self-hosted", "A100"], ["self-hosted", "H100"]]'
echo '::set-output name=matrix-HIP::[["self-hosted", "gfx90a"]]'
echo '::set-output name=matrix-MACOS::[["macos-latest"]]'
else
echo '::set-output name=matrix-CUDA::["ubuntu-latest"]'
echo '::set-output name=matrix-HIP::["ubuntu-latest"]'
echo '::set-output name=matrix-MACOS::[["macos-latest"]]'
fi

pre-commit:
Expand Down Expand Up @@ -162,6 +165,7 @@ jobs:
run: |
git diff


Integration-Tests:
needs: Runner-Preparation
if: needs.Runner-Preparation.outputs.matrix-CUDA != ''
Expand All @@ -186,6 +190,7 @@ jobs:
echo "llvm=$(cat cmake/llvm-hash.txt | cut -c 1-8)" >> $GITHUB_OUTPUT
echo "pybind11=$(cat cmake/pybind11-version.txt)" >> $GITHUB_OUTPUT
echo "nvidia=$(cat cmake/nvidia-toolchain-version.txt)" >> $GITHUB_OUTPUT
echo "json=$(cat cmake/json-version.txt)" >> $GITHUB_OUTPUT
echo "datetime=$(date -u -Iseconds)" >> $GITHUB_OUTPUT
shell: bash

Expand All @@ -199,7 +204,8 @@ jobs:
~/.triton/llvm
~/.triton/nvidia
~/.triton/pybind11
key: ${{ runner.os }}-${{ runner.arch }}-llvm-${{ steps.cache-key.outputs.llvm }}-nvidia-${{ steps.cache-key.outputs.nvidia }}-pybind11-${{ steps.cache-key.outputs.pybind11 }}
~/.triton/json
key: ${{ runner.os }}-${{ runner.arch }}-llvm-${{ steps.cache-key.outputs.llvm }}-nvidia-${{ steps.cache-key.outputs.nvidia }}-pybind11-${{ steps.cache-key.outputs.pybind11 }}-json-${{ steps.cache-key.outputs.json }}

# Cache ~/.triton/cache because the vast majority of unit test time is
# spent compiling. Triton won't (well, should not) use these cached files
Expand Down Expand Up @@ -384,3 +390,52 @@ jobs:
- *run-cpp-unittests-step
- *save-build-artifacts-step
- *inspect-cache-directories-step

Build-Tests:
needs: Runner-Preparation
if: needs.Runner-Preparation.outputs.matrix-MACOS != ''
runs-on: ${{ matrix.runner }}
timeout-minutes: 30
strategy:
matrix:
runner: ${{fromJson(needs.Runner-Preparation.outputs.matrix-MACOS)}}
steps:
- name: Checkout
uses: actions/checkout@v4
with:
submodules: "true"
- name: Install brew dependencies
run: |
brew update
brew install ccache llvm

- *compute-cache-keys-step
- *cache-build-dependencies-step
- *restore-build-artifacts-step
- *inspect-cache-directory-step

- name: Update PATH
run: |
echo "$HOME/.local/bin" >> $GITHUB_PATH
echo "/opt/homebrew/opt/llvm/bin" >> $GITHUB_PATH
- name: Install pip dependencies
run: |
python3 -m venv ~/.venv
source ~/.venv/bin/activate
python3 -m pip install --upgrade pip
python3 -m pip install cython setuptools wheel cmake==3.24 ninja pytest-xdist lit
- name: Install Triton
env:
TRITON_BUILD_WITH_CCACHE: "true"
TRITON_BUILD_WITH_O1: "true"
# macos-latest has 3 vcpus and 7GB DRAM, to save memory we limit the number of jobs to 3
# https://docs.github.com/en/actions/using-github-hosted-runners/about-github-hosted-runners/about-github-hosted-runners#standard-github-hosted-runners-for-public-repositories
MAX_JOBS: 3
run: |
source ~/.venv/bin/activate
echo "PATH is '$PATH'"
cd python
python3 -m pip install --no-build-isolation .

- *save-build-artifacts-step
- *inspect-cache-directories-step
4 changes: 3 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ set(TRITON_CODEGEN_BACKENDS "" CACHE STRING "Enable different codegen backends")
# Customized release build type with assertions: TritonRelBuildWithAsserts
set(CMAKE_C_FLAGS_TRITONRELBUILDWITHASSERTS "-O2 -g")
set(CMAKE_CXX_FLAGS_TRITONRELBUILDWITHASSERTS "-O2 -g")
set(CMAKE_C_FLAGS_TRITONBUILDWITHO1 "-O1")
set(CMAKE_CXX_FLAGS_TRITONBUILDWITHO1 "-O1")

# Default build type
if(NOT CMAKE_BUILD_TYPE)
Expand Down Expand Up @@ -265,7 +267,7 @@ if(TRITON_BUILD_PYTHON_MODULE AND NOT WIN32)

# Check if the platform is MacOS
if(APPLE)
set(PYTHON_LDFLAGS "-undefined dynamic_lookup -flto")
set(PYTHON_LDFLAGS "-undefined dynamic_lookup")
endif()

target_link_libraries(triton PRIVATE ${PYTHON_LDFLAGS})
Expand Down
1 change: 1 addition & 0 deletions cmake/json-version.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
v3.11.3
2 changes: 1 addition & 1 deletion cmake/llvm-hash.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1e5f29af81a5f6fda308074f6345b9fba4faa71c
10dc3a8e916d73291269e5e2b82dd22681489aa1
Loading