diff --git a/.github/workflows/pr-test-sgl-kernel.yml b/.github/workflows/pr-test-sgl-kernel.yml index c87f8d548b2..d86e3470162 100644 --- a/.github/workflows/pr-test-sgl-kernel.yml +++ b/.github/workflows/pr-test-sgl-kernel.yml @@ -35,9 +35,14 @@ jobs: runs-on: sgl-kernel-build-node strategy: matrix: - python-version: ['3.9'] - cuda-version: ['12.4'] - + include: + - python-version: '3.9' + cuda-version: '11.8' + - python-version: '3.9' + cuda-version: '12.4' + - python-version: '3.9' + cuda-version: '12.8' + name: Build Wheel (CUDA ${{ matrix.cuda-version }}) steps: - name: Cleanup run: | @@ -52,13 +57,14 @@ jobs: with: python-version: ${{ matrix.python-version }} - - name: Build wheels for Python ${{ matrix.python-version }} and CUDA ${{ matrix.cuda-version }} + - name: Build wheel for Python ${{ matrix.python-version }} and CUDA ${{ matrix.cuda-version }} run: | cd sgl-kernel chmod +x ./build.sh ./build.sh "${{ matrix.python-version }}" "${{ matrix.cuda-version }}" - - name: Upload artifacts + - name: Upload artifacts (only for CUDA 12.4) + if: ${{ matrix.cuda-version == '12.4' }} uses: actions/upload-artifact@v4 with: name: wheel-python${{ matrix.python-version }}-cuda${{ matrix.cuda-version }} @@ -128,7 +134,7 @@ jobs: pip3 uninstall sgl-kernel -y finish: - needs: [unit-test, mla-test, lint] + needs: [unit-test, mla-test, lint, build-wheels] runs-on: ubuntu-latest steps: - name: Check all dependent job statuses diff --git a/.github/workflows/release-whl-kernel.yml b/.github/workflows/release-whl-kernel.yml index 631551475fe..ebfc4b80237 100644 --- a/.github/workflows/release-whl-kernel.yml +++ b/.github/workflows/release-whl-kernel.yml @@ -14,7 +14,7 @@ on: jobs: build-wheels: if: github.repository == 'sgl-project/sglang' - runs-on: ubuntu-latest + runs-on: sgl-kernel-build-node strategy: matrix: python-version: ['3.9'] diff --git a/sgl-kernel/csrc/attention/cutlass_mla_kernel.cu b/sgl-kernel/csrc/attention/cutlass_mla_kernel.cu index da6ea2a08de..46ad440c58e 100644 --- a/sgl-kernel/csrc/attention/cutlass_mla_kernel.cu +++ b/sgl-kernel/csrc/attention/cutlass_mla_kernel.cu @@ -25,6 +25,8 @@ limitations under the License. #include #include +#if defined CUDA_VERSION && CUDA_VERSION >= 12040 + #define CUTLASS_CHECK(status) \ { \ cutlass::Status error = status; \ @@ -205,3 +207,5 @@ int64_t cutlass_mla_get_workspace_size(int64_t max_seq_len, int64_t num_batches, return MlaSm100Type::Fmha::get_workspace_size(arguments); } + +#endif