From 6067dd3313014a33faee7d25ec8c5ddb5ec874cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?oliver=20k=C3=B6nig?= Date: Fri, 22 Aug 2025 12:14:57 +0000 Subject: [PATCH 1/8] ci: Move build job to workflow template MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: oliver könig --- .github/workflows/_build.yml | 5 +++++ .github/workflows/publish.yml | 1 + 2 files changed, 6 insertions(+) diff --git a/.github/workflows/_build.yml b/.github/workflows/_build.yml index d55c47fd910..bbdf825b9b6 100644 --- a/.github/workflows/_build.yml +++ b/.github/workflows/_build.yml @@ -23,6 +23,11 @@ on: description: "The C++11 ABI to use for the build" required: true type: string + upload-to-release: + description: "Upload wheel to this release" + required: false + type: boolean + default: false release-version: description: "Upload wheel to this release" required: false diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 0a668e291cb..e8d9c991f42 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -61,6 +61,7 @@ jobs: cuda-version: ${{ matrix.cuda-version }} torch-version: ${{ matrix.torch-version }} cxx11_abi: ${{ matrix.cxx11_abi }} + upload-to-release: true release-version: ${{ needs.setup_release.outputs.release-version }} publish_package: From 1e4c398fa6c54054ddfa2369e6568e0147f35334 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?oliver=20k=C3=B6nig?= Date: Fri, 22 Aug 2025 12:18:09 +0000 Subject: [PATCH 2/8] check out right tag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: oliver könig --- .github/workflows/_build.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/_build.yml b/.github/workflows/_build.yml index bbdf825b9b6..f21cdfa6731 100644 --- a/.github/workflows/_build.yml +++ b/.github/workflows/_build.yml @@ -42,8 +42,10 @@ jobs: runs-on: ${{ inputs.runs-on }} name: Build wheel (${{ inputs.release-version }}-${{ inputs.python-version }}-${{ inputs.cuda-version }}-${{ inputs.torch-version }}-${{ inputs.cxx11_abi }}) steps: - - name: Checkout - uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: ${{ inputs.python-version }} - name: Set up Python uses: actions/setup-python@v5 From cb3faa604a2221480c516181f1b2a5f5db223b79 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?oliver=20k=C3=B6nig?= Date: Fri, 22 Aug 2025 12:21:03 +0000 Subject: [PATCH 3/8] fix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: oliver könig --- .github/workflows/_build.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/_build.yml b/.github/workflows/_build.yml index f21cdfa6731..0e047699a9c 100644 --- a/.github/workflows/_build.yml +++ b/.github/workflows/_build.yml @@ -42,10 +42,10 @@ jobs: runs-on: ${{ inputs.runs-on }} name: Build wheel (${{ inputs.release-version }}-${{ inputs.python-version }}-${{ inputs.cuda-version }}-${{ inputs.torch-version }}-${{ inputs.cxx11_abi }}) steps: - - name: Set up Python - uses: actions/setup-python@v5 + - name: Checkout + uses: actions/checkout@v4 with: - python-version: ${{ inputs.python-version }} + ref: ${{ inputs.release-version }} - name: Set up Python uses: actions/setup-python@v5 From d1a4caadf4565e061f1a9d1805fc83cbdd5e5a17 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?oliver=20k=C3=B6nig?= Date: Fri, 22 Aug 2025 12:24:01 +0000 Subject: [PATCH 4/8] fix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: oliver könig --- .github/workflows/_build.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/_build.yml b/.github/workflows/_build.yml index 0e047699a9c..e6a8cb664f0 100644 --- a/.github/workflows/_build.yml +++ b/.github/workflows/_build.yml @@ -149,6 +149,7 @@ jobs: - name: Upload Release Asset id: upload_release_asset + if: inputs.upload-to-release uses: actions/upload-release-asset@v1 env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} From 9512812ef98aec5545a3064333808da5828448b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?oliver=20k=C3=B6nig?= Date: Fri, 22 Aug 2025 12:24:38 +0000 Subject: [PATCH 5/8] fix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: oliver könig --- .github/workflows/_build.yml | 6 ------ .github/workflows/publish.yml | 1 - 2 files changed, 7 deletions(-) diff --git a/.github/workflows/_build.yml b/.github/workflows/_build.yml index e6a8cb664f0..52a47b4b434 100644 --- a/.github/workflows/_build.yml +++ b/.github/workflows/_build.yml @@ -23,11 +23,6 @@ on: description: "The C++11 ABI to use for the build" required: true type: string - upload-to-release: - description: "Upload wheel to this release" - required: false - type: boolean - default: false release-version: description: "Upload wheel to this release" required: false @@ -149,7 +144,6 @@ jobs: - name: Upload Release Asset id: upload_release_asset - if: inputs.upload-to-release uses: actions/upload-release-asset@v1 env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index e8d9c991f42..0a668e291cb 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -61,7 +61,6 @@ jobs: cuda-version: ${{ matrix.cuda-version }} torch-version: ${{ matrix.torch-version }} cxx11_abi: ${{ matrix.cxx11_abi }} - upload-to-release: true release-version: ${{ needs.setup_release.outputs.release-version }} publish_package: From 70e6e1d2f0649be857df5e938c37e30beea97a77 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?oliver=20k=C3=B6nig?= Date: Fri, 22 Aug 2025 12:34:00 +0000 Subject: [PATCH 6/8] revert MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: oliver könig --- .github/workflows/_build.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/_build.yml b/.github/workflows/_build.yml index 52a47b4b434..d55c47fd910 100644 --- a/.github/workflows/_build.yml +++ b/.github/workflows/_build.yml @@ -39,8 +39,6 @@ jobs: steps: - name: Checkout uses: actions/checkout@v4 - with: - ref: ${{ inputs.release-version }} - name: Set up Python uses: actions/setup-python@v5 From 26ee82fcc933491f4fa2f52aab9c4d8bbbe6a36a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?oliver=20k=C3=B6nig?= Date: Wed, 27 Aug 2025 04:57:43 +0200 Subject: [PATCH 7/8] ci: Allow build/deploy of arbitrary configurations (#1827) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * ci: Allow build/deploy of arbitrary configurations Signed-off-by: oliver könig * add Signed-off-by: oliver könig * cleanui Signed-off-by: oliver könig * cxx11_abi Signed-off-by: oliver könig * fix Signed-off-by: oliver könig * fix Signed-off-by: oliver könig * test Signed-off-by: oliver könig * fix Signed-off-by: oliver könig * fix Signed-off-by: oliver könig * final Signed-off-by: oliver könig --------- Signed-off-by: oliver könig --- .github/workflows/_build.yml | 76 +++++++++++++++++++++++++++++++++--- .github/workflows/build.yml | 47 ++++++++++++++++++++++ 2 files changed, 117 insertions(+), 6 deletions(-) create mode 100644 .github/workflows/build.yml diff --git a/.github/workflows/_build.yml b/.github/workflows/_build.yml index d55c47fd910..47d7bb49055 100644 --- a/.github/workflows/_build.yml +++ b/.github/workflows/_build.yml @@ -23,6 +23,11 @@ on: description: "The C++11 ABI to use for the build" required: true type: string + upload-to-release: + description: "Upload wheel to this release" + required: false + type: boolean + default: false release-version: description: "Upload wheel to this release" required: false @@ -39,6 +44,9 @@ jobs: steps: - name: Checkout uses: actions/checkout@v4 + with: + ref: ${{ inputs.release-version }} + submodules: recursive - name: Set up Python uses: actions/setup-python@v5 @@ -109,9 +117,34 @@ jobs: python -c "import torch; print('PyTorch:', torch.__version__)" python -c "import torch; print('CUDA:', torch.version.cuda)" python -c "from torch.utils import cpp_extension; print (cpp_extension.CUDA_HOME)" - shell: bash + + - name: Restore build cache + uses: actions/cache/restore@v4 + with: + path: build.tar + key: build-${{ inputs.release-version }}-${{ inputs.python-version }}-${{ inputs.cuda-version }}-${{ inputs.torch-version }}-${{ inputs.cxx11_abi }}-${{ github.run_number }}-${{ github.run_attempt }} + restore-keys: | + build-${{ inputs.release-version }}-${{ inputs.python-version }}-${{ inputs.cuda-version }}-${{ inputs.torch-version }}-${{ inputs.cxx11_abi }}- + + - name: Unpack build cache + run: | + echo ::group::Adjust timestamps + sudo find / -exec touch -t 197001010000 {} + || true + echo ::endgroup:: + + if [ -f build.tar ]; then + find . -mindepth 1 -maxdepth 1 ! -name 'build.tar' -exec rm -rf {} + + tar -xpvf build.tar -C . + else + echo "No build.tar found, skipping" + fi + + ls -al ./ + ls -al build/ || true + ls -al csrc/ || true - name: Build wheel + id: build_wheel run: | # We want setuptools >= 49.6.0 otherwise we can't compile the extension if system CUDA version is 11.7 and pytorch cuda version is 11.6 # https://github.com/pytorch/pytorch/blob/664058fa83f1d8eede5d66418abff6e20bd76ca8/torch/utils/cpp_extension.py#L810 @@ -122,11 +155,41 @@ jobs: export LD_LIBRARY_PATH=/usr/local/nvidia/lib64:/usr/local/cuda/lib64:$LD_LIBRARY_PATH # Limit MAX_JOBS otherwise the github runner goes OOM # nvcc 11.8 can compile with 2 jobs, but nvcc 12.3 goes OOM - MAX_JOBS=$([ "$MATRIX_CUDA_VERSION" == "129" ] && echo 1 || echo 2) NVCC_THREADS=2 FLASH_ATTENTION_FORCE_BUILD="TRUE" FLASH_ATTENTION_FORCE_CXX11_ABI=${{ inputs.cxx11_abi}} python setup.py bdist_wheel --dist-dir=dist - tmpname=cu${WHEEL_CUDA_VERSION}torch${MATRIX_TORCH_VERSION}cxx11abi${{ inputs.cxx11_abi }} - wheel_name=$(ls dist/*whl | xargs -n 1 basename | sed "s/-/+$tmpname-/2") - ls dist/*whl |xargs -I {} mv {} dist/${wheel_name} - echo "wheel_name=${wheel_name}" >> $GITHUB_ENV + + export MAX_JOBS=$([ "$MATRIX_CUDA_VERSION" == "129" ] && echo 1 || echo 2) + export NVCC_THREADS=2 + export FLASH_ATTENTION_FORCE_BUILD="TRUE" + export FLASH_ATTENTION_FORCE_CXX11_ABI=${{ inputs.cxx11_abi }} + + # 5h timeout since GH allows max 6h and we want some buffer + EXIT_CODE=0 + timeout 5h python setup.py bdist_wheel --dist-dir=dist || EXIT_CODE=$? + + if [ $EXIT_CODE -eq 0 ]; then + tmpname=cu${WHEEL_CUDA_VERSION}torch${MATRIX_TORCH_VERSION}cxx11abi${{ inputs.cxx11_abi }} + wheel_name=$(ls dist/*whl | xargs -n 1 basename | sed "s/-/+$tmpname-/2") + ls dist/*whl |xargs -I {} mv {} dist/${wheel_name} + echo "wheel_name=${wheel_name}" >> $GITHUB_ENV + fi + + # Store exit code in GitHub env for later steps + echo "build_exit_code=$EXIT_CODE" | tee -a "$GITHUB_OUTPUT" + + # Do not fail the job if timeout killed the build + exit $EXIT_CODE + + - name: Log build logs after timeout + if: always() && steps.build_wheel.outputs.build_exit_code == 124 + run: | + ls -al ./ + tar -cvf build.tar . --atime-preserve=replace + + - name: Save build cache timeout + if: always() && steps.build_wheel.outputs.build_exit_code == 124 + uses: actions/cache/save@v4 + with: + key: build-${{ inputs.release-version }}-${{ inputs.python-version }}-${{ inputs.cuda-version }}-${{ inputs.torch-version }}-${{ inputs.cxx11_abi }}-${{ github.run_number }}-${{ github.run_attempt }} + path: build.tar - name: Log Built Wheels run: | @@ -142,6 +205,7 @@ jobs: - name: Upload Release Asset id: upload_release_asset + if: inputs.upload-to-release uses: actions/upload-release-asset@v1 env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml new file mode 100644 index 00000000000..9a454b3fcde --- /dev/null +++ b/.github/workflows/build.yml @@ -0,0 +1,47 @@ +name: Build wheels + +on: + workflow_call: + inputs: + runs-on: + description: "The runner to use for the build" + required: true + type: string + default: ubuntu-22.04 + python-version: + description: "The Python version to use for the build" + required: true + type: string + cuda-version: + description: "The CUDA version to use for the build" + required: true + type: string + torch-version: + description: "The PyTorch version to use for the build" + required: true + type: string + cxx11_abi: + description: "Enable torch flag C++11 ABI (TRUE/FALSE)" + required: true + type: string + upload-to-release: + description: "Upload wheel to this release" + required: false + type: boolean + default: false + release-version: + description: "Upload wheel to this release" + required: false + type: string + +jobs: + build-wheels: + uses: ./.github/workflows/_build.yml + with: + runs-on: ${{ inputs.runs-on }} + python-version: ${{ inputs.python-version }} + cuda-version: ${{ inputs.cuda-version }} + torch-version: ${{ inputs.torch-version }} + cxx11_abi: ${{ inputs.cxx11_abi }} + upload-to-release: ${{ inputs.upload-to-release }} + release-version: ${{ inputs.release-version }} From 98e16f753b338e5f18769f373efcf0321869a030 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?oliver=20k=C3=B6nig?= Date: Wed, 27 Aug 2025 08:45:53 +0000 Subject: [PATCH 8/8] upload MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: oliver könig --- .github/workflows/publish.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 0a668e291cb..d11b703ef99 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -62,6 +62,7 @@ jobs: torch-version: ${{ matrix.torch-version }} cxx11_abi: ${{ matrix.cxx11_abi }} release-version: ${{ needs.setup_release.outputs.release-version }} + upload-to-release: true publish_package: name: Publish package