maryamtahhan · pull · Apr 14, 2026 · Apr 14, 2026 · Apr 15, 2026 · Apr 15, 2026
diff --git a/.flake8 b/.flake8
diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
@@ -46,3 +46,20 @@ lib/Dialect/TritonGPU/Transforms/TritonGPUConversion.cpp @ptillet
 # third_party
 # -----------
 third_party/amd/ @antiagainst @zhanglx13
+third_party/proton/ @Jokeren @crobeck @fywkevin
+
+# -----------
+# gluon
+# -----------
+python/triton/experimental/gluon/ @peterbell10
+python/src/gluon_ir.cc @peterbell10
+python/test/gluon @peterbell10
+test/Gluon @peterbell10
+include/triton/Dialect/Gluon @peterbell10
+lib/Dialect/Gluon @peterbell10
+
+# -----------
+# Linear Layouts
+# -----------
+lib/Tools/ @lezcano
+lib/Dialect/TritonGPU/IR/LinearLayoutConversions.cpp @lezcano
diff --git a/.github/ISSUE_TEMPLATE/bug.yml b/.github/ISSUE_TEMPLATE/bug.yml
@@ -0,0 +1,48 @@
+name: Report a bug
+description: Report triton failing to compile a kernel, or giving incorrect results
+labels: ["bug"]
+
+body:
+- type: markdown
+  attributes:
+    value: |
+      #### Disclaimer
+      The core triton team is small and has very limited capacity. We may not have time to look into your report.
+      For the best results, please:
+        - Avoid submitting duplicates. Search through [the existing and past issues](https://github.com/triton-lang/triton/issues?q=is%3Aissue+sort%3Acreated-desc+) first to see if it's been reported previously.
+        - Check if the issue persists with a build from the latest source.
+        - Provide all relevant information in the initial report, to prevent unnecessary back and forth discussion.
+        - If you can, try to diagnose and/or fix the issue yourself. We welcome high quality contributions.
+- type: textarea
+  attributes:
+    label: Describe the bug
+    description: |
+      Please provide a clear and concise description of what the bug is.
+
+      If relevant, add a [minimal complete example](https://stackoverflow.com/help/minimal-reproducible-example) that reproduces the bug. It is very important for the snippet to be as simple as possible, so please take time to trim down any irrelevant code to help us debug efficiently. We are going to copy-paste your code and we expect to get the same result as you did, so include both the kernel and launching code as well as any relevant imports.
+
+      If the code is too long (hopefully, it isn't), feel free to put it in a public gist and link it in the issue: https://gist.github.com.
+
+      Please also paste or describe the results you observe instead of the expected results. If you observe an error, please paste the error message including the **full** traceback of the exception. It may be relevant to wrap error messages in ```` ```triple quotes blocks``` ````.
+    placeholder: |
+      A clear and concise description of what the bug is.
+
+      ```python
+      # Sample code to reproduce the problem
+      ```
+
+      ```
+      The error message you got, with the full traceback.
+      ```
+  validations:
+    required: true
+- type: textarea
+  attributes:
+    label: Environment details
+    description: |
+      Please include any relevant context about how you're running the reproducer e.g. which version of triton, and what GPU you are using.
+    placeholder: |
+        Triton: ...
+        GPU: ...
+  validations:
+    required: true
diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml
@@ -0,0 +1,5 @@
+blank_issues_enabled: true
+contact_links:
+  - name: Community help
+    url: https://discord.gg/gpumode
+    about: GPU-mode discord community has a triton channel which is a great resource for help writing/learning triton
diff --git a/.github/ISSUE_TEMPLATE/performance.yml b/.github/ISSUE_TEMPLATE/performance.yml
@@ -0,0 +1,44 @@
+name: Report a performance issue
+description: Report cases where triton is generating sub-optimal (but functionally correct) PTX/LLVM IR
+labels: ["performance"]
+
+body:
+- type: markdown
+  attributes:
+    value: |
+      #### Disclaimer
+      The core triton team is small and has very limited capacity. We may not have time to look into your report.
+      For the best results, please:
+        - Avoid submitting duplicates. Search through [the existing and past issues](https://github.com/triton-lang/triton/issues?q=is%3Aissue+sort%3Acreated-desc+) first to see if it's been reported previously.
+        - Check if the issue persists with a build from the latest source.
+        - Provide all relevant information in the initial report, to prevent unnecessary back and forth discussion.
+        - If you can, try to diagnose and/or fix the issue yourself. We welcome high quality contributions.
+- type: textarea
+  attributes:
+    label: Describe the issue
+    description: |
+      Please provide a clear and concise description of the issue.
+
+      Include a [minimal complete example](https://stackoverflow.com/help/minimal-reproducible-example) that reproduces the issue. It is very important for the snippet to be as simple as possible, so please take time to trim down any irrelevant code to help us debug efficiently. We are going to copy-paste your code and we expect to get the same result as you did.
+
+      A reproducer could be a python program that runs a triton kernel and prints out the relevant suboptimal IR, or an IR file with an accompanying triton-opt command.
+
+      If the code is too long (hopefully, it isn't), feel free to put it in a public gist and link it in the issue: https://gist.github.com.
+    placeholder: |
+      A clear and concise description of the issue.
+
+      ```python
+      # Sample code to reproduce the problem
+      ```
+  validations:
+    required: true
+- type: textarea
+  attributes:
+    label: Environment details
+    description: |
+      Please include any relevant context about how you're running the reproducer e.g. which version of triton, and what GPU you are using.
+    placeholder: |
+        Triton: ...
+        GPU: ...
+  validations:
+    required: true
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
@@ -1,11 +1,14 @@
+<!---
 The core Triton is a small number of people, and we receive many PRs (thank
 you!).  To help us review your code more quickly, **if you are a new
 contributor (less than 3 PRs merged) we ask that you complete the following
 tasks and include the filled-out checklist in your PR description.**
 
 Complete the following tasks before sending your PR, and replace `[ ]` with
 `[x]` to indicate you have done them.
+-->
 
+# New contributor declaration
 - [ ] I am not making a trivial change, such as fixing a typo in a comment.
 
 - [ ] I have written a PR description following these

diff --git a/.github/workflows/build-macos.yml b/.github/workflows/build-macos.yml
@@ -0,0 +1,135 @@
+name: Build MacOS
+
+on:
+  workflow_call:
+    inputs:
+      matrix:
+        required: true
+        type: string
+
+jobs:
+  build-macos:
+    runs-on: ${{ matrix.runner }}
+    strategy:
+      matrix:
+        runner: ${{ fromJson(inputs.matrix) }}
+    timeout-minutes: 60
+    env:
+      RUNNER_TYPE: ${{ matrix.runner[0] }}
+      TRITON_BUILD_WITH_CLANG_LLD: "TRUE"
+    name: Build MacOS
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v6
+        with:
+          submodules: "true"
+      - name: Install brew dependencies
+        run: |
+          brew update
+          brew install ccache llvm@19 lld coreutils
+      - name: Compute cache keys
+        id: cache-key
+        run: |
+          llvm_file="cmake/llvm-hash.txt"
+          nvidia_file="cmake/nvidia-toolchain-version.json"
+          json_file="cmake/json-version.txt"
+
+          # Check if files exist before proceeding
+          if [[ ! -f "$llvm_file" || ! -f "$nvidia_file" || ! -f "$json_file" ]]; then
+            echo "Error: Required dependency files are missing."
+            exit 1
+          fi
+
+          # Process the files if they exist
+          echo "llvm=$(cat $llvm_file | cut -c 1-8)" >> $GITHUB_OUTPUT
+          echo "nvidia=$(sha256sum $nvidia_file | cut -d ' ' -f 1)" >> $GITHUB_OUTPUT
+          echo "json=$(cat $json_file)" >> $GITHUB_OUTPUT
+          echo "datetime=$(date -u -Iseconds)" >> $GITHUB_OUTPUT
+        shell: bash
+      - name: Cache build dependencies
+        uses: actions/cache@v4
+        with:
+          # Note that we cannot use environment variables here given there is
+          # no shell to interpret them in the paths.
+          path: |
+            ~/.triton/llvm
+            ~/.triton/nvidia
+            ~/.triton/json
+          key: ${{ runner.os }}-${{ runner.arch }}-llvm-${{ steps.cache-key.outputs.llvm }}-nvidia-${{ steps.cache-key.outputs.nvidia }}-json-${{ steps.cache-key.outputs.json }}
+      - # Cache ~/.cache/ccache to speed up compilation.
+        #
+        # On branch `main` we always start from an empty cache, i.e. we skip the
+        # "restore" step.  This is to prevent the caches from accumulating stale
+        # files over time.
+        name: Restore cache of ccache and Triton compilation artifacts
+        id: restore-build-cache
+        if: github.ref != 'refs/heads/main'
+        uses: actions/cache/restore@v4
+        with:
+          path: |
+            ~/.ccache
+          # Restore the most recent cache entry.
+          restore-keys: |
+            triton-artifacts-${{ runner.os }}-${{ runner.arch }}-${{ env.RUNNER_TYPE }}-llvm-${{ steps.cache-key.outputs.llvm }}-
+            triton-artifacts-${{ runner.os }}-${{ runner.arch }}-${{ env.RUNNER_TYPE }}-
+          # We expect this cache key never to hit and for us to fall back
+          # unconditionally to the restore-key, so it doesn't actually matter
+          # what we put here (so long as it doesn't hit an existing key).
+          key: triton-artifacts-${{ runner.os }}-${{ runner.arch }}-${{ env.RUNNER_TYPE }}-llvm-${{ steps.cache-key.outputs.llvm }}-${{ steps.cache-key.outputs.datetime }}
+      - name: Inspect cache directories
+        run: |
+          mkdir -p ~/.triton
+          du -h -d 1 ~/.triton
+
+          mkdir -p ~/.ccache
+          du -h -d 1 ~/.ccache
+      - name: Update PATH
+        run: |
+          echo "$HOME/.local/bin" >> $GITHUB_PATH
+          echo "/opt/homebrew/opt/llvm/bin" >> $GITHUB_PATH
+      - name: Create venv
+        run: |
+          python3 -m venv ~/.venv
+          source ~/.venv/bin/activate
+          python3 -m pip install --upgrade pip
+      - name: Install Triton
+        env:
+          TRITON_BUILD_WITH_O1: "true"
+          # macos-latest has 3 vcpus and 7GB DRAM, to save memory we limit the number of jobs to 3
+          # https://docs.github.com/en/actions/reference/github-hosted-runners-reference#standard-github-hosted-runners-for-public-repositories
+          MAX_JOBS: 3
+          # Add elapsed time in seconds to ninja status to monitor where build stalls
+          NINJA_STATUS: "[%f/%t, %es elapsed] "
+        run: |
+          source ~/.venv/bin/activate
+          echo "PATH is '$PATH'"
+          ccache --zero-stats
+          export PATH="/opt/homebrew/opt/llvm@19/bin:$PATH"
+          export CC="/opt/homebrew/opt/llvm@19/bin/clang"
+          export CXX="/opt/homebrew/opt/llvm@19/bin/clang++"
+          export CXXFLAGS="-stdlib=libc++"
+          export LDFLAGS="-L/opt/homebrew/opt/llvm@19/lib"
+          which clang++
+          clang++ --version
+          make dev-install
+      - name: CCache Stats
+        run: ccache --print-stats
+      - name: Inspect cache directories
+        run: |
+          mkdir -p ~/.triton
+          du -h -d 1 ~/.triton
+
+          mkdir -p ~/.ccache
+          du -h -d 1 ~/.ccache
+      - # If we're on branch `main`, save the ccache Triton compilation artifacts
+        # to the cache so they can be used by other (non-main) CI runs.
+        #
+        # (It wouldn't be a problem to save the cache on every run, because github
+        # evicts cache entries LRU, but maybe this saves a bit of time in CI.)
+        name: Save ccache and Triton compilation artifacts to cache
+        if: github.ref == 'refs/heads/main'
+        uses: actions/cache/save@v4
+        with:
+          path: |
+            ~/.ccache
+          key: triton-artifacts-${{ runner.os }}-${{ runner.arch }}-${{ env.RUNNER_TYPE }}-llvm-${{ steps.cache-key.outputs.llvm }}-${{ steps.cache-key.outputs.datetime }}
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -0,0 +1,43 @@
+name: Integration Tests
+on:
+  workflow_dispatch:
+  pull_request:
+    branches-ignore: ['llvm-**']
+  merge_group:
+    branches: [main, 'dev-**']
+    types: [checks_requested]
+  push:
+    branches: [main]
+concurrency:
+  group: ${{ github.ref }}
+  cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
+permissions: read-all
+
+jobs:
+
+  runner-preparation:
+    uses: ./.github/workflows/runner-preparation.yml
+
+  pre-commit:
+    uses: ./.github/workflows/pre-commit.yml
+
+  integration-tests-nvidia:
+    needs: runner-preparation
+    if: needs.runner-preparation.outputs.matrix-NVIDIA != ''
+    uses: ./.github/workflows/integration-tests-nvidia.yml
+    with:
+      matrix: ${{ needs.runner-preparation.outputs.matrix-NVIDIA }}
+
+  integration-tests-amd:
+    needs: runner-preparation
+    if: needs.runner-preparation.outputs.matrix-AMD != ''
+    uses: ./.github/workflows/integration-tests-amd.yml
+    with:
+      matrix: ${{ needs.runner-preparation.outputs.matrix-AMD }}
+
+  build-macos:
+    needs: runner-preparation
+    if: needs.runner-preparation.outputs.matrix-MACOS != ''
+    uses: ./.github/workflows/build-macos.yml
+    with:
+      matrix: ${{ needs.runner-preparation.outputs.matrix-MACOS }}
diff --git a/.github/workflows/create_release.yml b/.github/workflows/create_release.yml
@@ -0,0 +1,77 @@
+name: Create Release
+
+on:
+  push:
+    branches:
+      - main
+      - release/*
+    tags:
+      # Final Release tags look like: v1.11.0
+      - v[0-9]+.[0-9]+.[0-9]+
+      # Release candidate tags look like: v1.11.0-rc1
+      - v[0-9]+.[0-9]+.[0-9]+-rc[0-9]+
+  release:
+    types: [published]
+  pull_request:
+    paths: [.github/workflows/create_release.yml]
+
+jobs:
+
+  release:
+    if: ${{ github.repository == 'triton-lang/triton' }}
+    name: Create Release
+    runs-on: ubuntu-latest
+    permissions:
+      contents: write
+    outputs:
+      release_name: "${{ steps.release_name.outputs.name }}"
+    steps:
+      - uses: actions/checkout@v6
+        with:
+          show-progress: false
+          submodules: 'recursive'
+          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+      - name: Fake name for PRs
+        if: ${{ github.event_name == 'pull_request' }}
+        run: echo "PT_GITHUB_REF=refs/tags/pr-tag" >> "$GITHUB_ENV"
+      - name: Real name for non-PRs
+        if: ${{ github.event_name != 'pull_request' }}
+        run: echo "PT_GITHUB_REF=$GITHUB_REF" >> "$GITHUB_ENV"
+      - name: Set filenames
+        run: |
+          tag_or_branch="${PT_GITHUB_REF#refs/tags/}"
+          tag_or_branch="${tag_or_branch#refs/heads/}"
+          # replace directory separators with _ in branch name
+          tag_or_branch="${tag_or_branch//\//_}"
+          if [[ ${tag_or_branch} == v* ]]; then
+            # strip trailing v from tag name
+            tag_or_branch="${tag_or_branch#v}"
+            # important: version must be fixed in setup.py
+            sed -i -e "s:^TRITON_VERSION = .*:TRITON_VERSION = '${tag_or_branch}':" setup.py || exit 1
+          fi
+          echo "RELEASE_NAME=triton-$tag_or_branch" >> "$GITHUB_ENV"
+      - name: Create source distribution
+        run: |
+          pip install build || exit 1
+          python -m build -s || exit 1
+          cd dist || exit 1
+          release_file=( *.tar.gz )
+          echo "RELEASE_FILE=${release_file}" >> "$GITHUB_ENV"
+      - name: Upload source distribution for release
+        if: ${{ github.event_name == 'release' }}
+        uses: softprops/action-gh-release@v3
+        with:
+          files: dist/${{env.RELEASE_FILE}}
+      - name: Upload source distribution to GHA artifacts for release tags
+        if: ${{ github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') && contains(github.ref, 'rc') }}
+        uses: actions/upload-artifact@v7
+        with:
+          name: ${{ env.RELEASE_FILE }}
+          path: dist/${{ env.RELEASE_FILE }}
+      - name: Set output
+        id: release_name
+        run: echo "name=release_name::${{ env.RELEASE_NAME }}.tar.gz" >> "${GITHUB_OUTPUT}"
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name }}
+  cancel-in-progress: true