From 3f98628545fa2a247c9246f2b32d16ec7c94e5b2 Mon Sep 17 00:00:00 2001 From: zhyncs Date: Sat, 12 Apr 2025 11:47:45 -0700 Subject: [PATCH 1/6] upd --- .github/workflows/pr-test-sgl-kernel.yml | 57 ++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/.github/workflows/pr-test-sgl-kernel.yml b/.github/workflows/pr-test-sgl-kernel.yml index c87f8d548b2..b9bf9569b95 100644 --- a/.github/workflows/pr-test-sgl-kernel.yml +++ b/.github/workflows/pr-test-sgl-kernel.yml @@ -64,6 +64,63 @@ jobs: name: wheel-python${{ matrix.python-version }}-cuda${{ matrix.cuda-version }} path: sgl-kernel/dist/* + build-wheel-cu118: + if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request' + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ['3.9'] + cuda-version: ['11.8'] + + steps: + - name: Cleanup + run: | + sudo rm -rf $GITHUB_WORKSPACE/* || true + + - uses: actions/checkout@v4 + with: + submodules: 'recursive' + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Build wheel for Python ${{ matrix.python-version }} and CUDA ${{ matrix.cuda-version }} + run: | + cd sgl-kernel + chmod +x ./build.sh + ./build.sh "${{ matrix.python-version }}" "${{ matrix.cuda-version }}" + + build-wheel-cu128: + if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request' + runs-on: sgl-kernel-build-node + strategy: + matrix: + python-version: ['3.9'] + cuda-version: ['12.8'] + + steps: + - name: Cleanup + run: | + sudo rm -rf $GITHUB_WORKSPACE/* || true + + - uses: actions/checkout@v4 + with: + submodules: 'recursive' + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Build wheel for Python ${{ matrix.python-version }} and CUDA ${{ matrix.cuda-version }} + run: | + cd sgl-kernel + chmod +x ./build.sh + ./build.sh "${{ matrix.python-version }}" "${{ matrix.cuda-version }}" + + unit-test: if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request' needs: build-wheels From 4b81e0f93f89059352e10adf4b86c05c600ac397 Mon Sep 17 00:00:00 2001 From: zhyncs Date: Sat, 12 Apr 2025 11:58:33 -0700 Subject: [PATCH 2/6] upd --- .github/workflows/pr-test-sgl-kernel.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/pr-test-sgl-kernel.yml b/.github/workflows/pr-test-sgl-kernel.yml index b9bf9569b95..a612ce99693 100644 --- a/.github/workflows/pr-test-sgl-kernel.yml +++ b/.github/workflows/pr-test-sgl-kernel.yml @@ -30,7 +30,7 @@ jobs: clangFormatVersion: 18 style: file - build-wheels: + build-wheel-cu124: if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request' runs-on: sgl-kernel-build-node strategy: @@ -123,7 +123,7 @@ jobs: unit-test: if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request' - needs: build-wheels + needs: build-wheel-cu124 runs-on: 1-gpu-runner steps: - uses: actions/checkout@v4 @@ -155,7 +155,7 @@ jobs: mla-test: if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request' - needs: build-wheels + needs: build-wheel-cu124 runs-on: 1-gpu-runner steps: - uses: actions/checkout@v4 @@ -185,7 +185,7 @@ jobs: pip3 uninstall sgl-kernel -y finish: - needs: [unit-test, mla-test, lint] + needs: [unit-test, mla-test, lint, build-wheel-cu118, build-wheel-cu128] runs-on: ubuntu-latest steps: - name: Check all dependent job statuses From 8242a1f6b3b14e1bd02c16bec50153e9ca8c7126 Mon Sep 17 00:00:00 2001 From: zhyncs Date: Sat, 12 Apr 2025 12:13:43 -0700 Subject: [PATCH 3/6] upd --- sgl-kernel/csrc/attention/cutlass_mla_kernel.cu | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sgl-kernel/csrc/attention/cutlass_mla_kernel.cu b/sgl-kernel/csrc/attention/cutlass_mla_kernel.cu index da6ea2a08de..46ad440c58e 100644 --- a/sgl-kernel/csrc/attention/cutlass_mla_kernel.cu +++ b/sgl-kernel/csrc/attention/cutlass_mla_kernel.cu @@ -25,6 +25,8 @@ limitations under the License. #include #include +#if defined CUDA_VERSION && CUDA_VERSION >= 12040 + #define CUTLASS_CHECK(status) \ { \ cutlass::Status error = status; \ @@ -205,3 +207,5 @@ int64_t cutlass_mla_get_workspace_size(int64_t max_seq_len, int64_t num_batches, return MlaSm100Type::Fmha::get_workspace_size(arguments); } + +#endif From f60ae7f672e4595aeda144f4ea4825dcc5180055 Mon Sep 17 00:00:00 2001 From: zhyncs Date: Sat, 12 Apr 2025 12:19:14 -0700 Subject: [PATCH 4/6] upd --- .github/workflows/pr-test-sgl-kernel.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pr-test-sgl-kernel.yml b/.github/workflows/pr-test-sgl-kernel.yml index a612ce99693..6fe6be20b98 100644 --- a/.github/workflows/pr-test-sgl-kernel.yml +++ b/.github/workflows/pr-test-sgl-kernel.yml @@ -66,7 +66,7 @@ jobs: build-wheel-cu118: if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request' - runs-on: ubuntu-latest + runs-on: sgl-kernel-build-node strategy: matrix: python-version: ['3.9'] From 1425aadd369049db47b7d825f8c85395e3f868c0 Mon Sep 17 00:00:00 2001 From: zhyncs Date: Sat, 12 Apr 2025 12:28:39 -0700 Subject: [PATCH 5/6] upd --- .github/workflows/pr-test-sgl-kernel.yml | 81 +++++------------------- 1 file changed, 15 insertions(+), 66 deletions(-) diff --git a/.github/workflows/pr-test-sgl-kernel.yml b/.github/workflows/pr-test-sgl-kernel.yml index 6fe6be20b98..d86e3470162 100644 --- a/.github/workflows/pr-test-sgl-kernel.yml +++ b/.github/workflows/pr-test-sgl-kernel.yml @@ -30,14 +30,19 @@ jobs: clangFormatVersion: 18 style: file - build-wheel-cu124: + build-wheels: if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request' runs-on: sgl-kernel-build-node strategy: matrix: - python-version: ['3.9'] - cuda-version: ['12.4'] - + include: + - python-version: '3.9' + cuda-version: '11.8' + - python-version: '3.9' + cuda-version: '12.4' + - python-version: '3.9' + cuda-version: '12.8' + name: Build Wheel (CUDA ${{ matrix.cuda-version }}) steps: - name: Cleanup run: | @@ -52,78 +57,22 @@ jobs: with: python-version: ${{ matrix.python-version }} - - name: Build wheels for Python ${{ matrix.python-version }} and CUDA ${{ matrix.cuda-version }} + - name: Build wheel for Python ${{ matrix.python-version }} and CUDA ${{ matrix.cuda-version }} run: | cd sgl-kernel chmod +x ./build.sh ./build.sh "${{ matrix.python-version }}" "${{ matrix.cuda-version }}" - - name: Upload artifacts + - name: Upload artifacts (only for CUDA 12.4) + if: ${{ matrix.cuda-version == '12.4' }} uses: actions/upload-artifact@v4 with: name: wheel-python${{ matrix.python-version }}-cuda${{ matrix.cuda-version }} path: sgl-kernel/dist/* - build-wheel-cu118: - if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request' - runs-on: sgl-kernel-build-node - strategy: - matrix: - python-version: ['3.9'] - cuda-version: ['11.8'] - - steps: - - name: Cleanup - run: | - sudo rm -rf $GITHUB_WORKSPACE/* || true - - - uses: actions/checkout@v4 - with: - submodules: 'recursive' - - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - - - name: Build wheel for Python ${{ matrix.python-version }} and CUDA ${{ matrix.cuda-version }} - run: | - cd sgl-kernel - chmod +x ./build.sh - ./build.sh "${{ matrix.python-version }}" "${{ matrix.cuda-version }}" - - build-wheel-cu128: - if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request' - runs-on: sgl-kernel-build-node - strategy: - matrix: - python-version: ['3.9'] - cuda-version: ['12.8'] - - steps: - - name: Cleanup - run: | - sudo rm -rf $GITHUB_WORKSPACE/* || true - - - uses: actions/checkout@v4 - with: - submodules: 'recursive' - - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - - - name: Build wheel for Python ${{ matrix.python-version }} and CUDA ${{ matrix.cuda-version }} - run: | - cd sgl-kernel - chmod +x ./build.sh - ./build.sh "${{ matrix.python-version }}" "${{ matrix.cuda-version }}" - - unit-test: if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request' - needs: build-wheel-cu124 + needs: build-wheels runs-on: 1-gpu-runner steps: - uses: actions/checkout@v4 @@ -155,7 +104,7 @@ jobs: mla-test: if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request' - needs: build-wheel-cu124 + needs: build-wheels runs-on: 1-gpu-runner steps: - uses: actions/checkout@v4 @@ -185,7 +134,7 @@ jobs: pip3 uninstall sgl-kernel -y finish: - needs: [unit-test, mla-test, lint, build-wheel-cu118, build-wheel-cu128] + needs: [unit-test, mla-test, lint, build-wheels] runs-on: ubuntu-latest steps: - name: Check all dependent job statuses From 65cee938dc17ef7422a223eaa9cf4f5d966c206e Mon Sep 17 00:00:00 2001 From: zhyncs Date: Sat, 12 Apr 2025 12:41:30 -0700 Subject: [PATCH 6/6] upd --- .github/workflows/release-whl-kernel.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/release-whl-kernel.yml b/.github/workflows/release-whl-kernel.yml index 631551475fe..ebfc4b80237 100644 --- a/.github/workflows/release-whl-kernel.yml +++ b/.github/workflows/release-whl-kernel.yml @@ -14,7 +14,7 @@ on: jobs: build-wheels: if: github.repository == 'sgl-project/sglang' - runs-on: ubuntu-latest + runs-on: sgl-kernel-build-node strategy: matrix: python-version: ['3.9']