Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
789 changes: 789 additions & 0 deletions .github/workflows/nightly-test-amd-rocm720.yml

Large diffs are not rendered by default.

944 changes: 944 additions & 0 deletions .github/workflows/pr-test-amd-rocm720.yml

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion .github/workflows/pr-test-amd.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ on:
- ".github/workflows/pr-test-amd.yml"
- "docker/rocm.Dockerfile"
pull_request:
branches: [ main ]
branches: [ dont-trigger-this-one-anyway ]
paths:
- "python/**"
- "scripts/ci/**"
Expand Down
83 changes: 83 additions & 0 deletions .github/workflows/release-docker-amd-rocm720-preview.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
name: Release Docker Images ROCm 7.2.0 Preview (AMD)
on:
pull_request:
branches:
- main
paths:
- "docker/rocm720.Dockerfile"
- ".github/workflows/release-docker-amd-rocm720-preview.yml"
push:
tags:
- 'v[0-9]+.*'
workflow_dispatch:
inputs:
version:
description: 'Version to build (without v prefix, e.g., 0.5.7)'
required: true

concurrency:
# A PR number if a pull request and otherwise the commit hash. This cancels
# queued and in-progress runs for the same PR (presubmit) or commit
# (postsubmit). The workflow name is prepended to avoid conflicts between
# different workflows.
group: ${{ github.workflow }}-${{ github.event.number || github.sha }}
cancel-in-progress: true

jobs:
publish:
if: github.repository == 'sgl-project/sglang'
runs-on: amd-docker-scale
environment: 'prod'
strategy:
fail-fast: false
matrix:
gpu_arch: ['gfx942-rocm720', 'gfx950-rocm720']
build_type: ['all']
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
fetch-depth: 0 # Required for git describe to find tags

- name: "Set Date"
run: |
echo "DATE=$(date +%Y%m%d)" >> $GITHUB_ENV

- name: Get version from latest tag
id: version
run: |
# Get the latest version tag sorted by version number (e.g., v0.5.7 -> 0.5.7)
VERSION=$(git tag -l 'v[0-9]*' --sort=-v:refname | head -1 | sed 's/^v//')

if [ -z "$VERSION" ]; then
echo "::error::Could not determine version from git tags"
exit 1
fi

echo "version=${VERSION}" >> $GITHUB_OUTPUT
echo "Detected version: ${VERSION}"

- name: Login to Docker Hub
uses: docker/login-action@v2
with:
username: ${{ secrets.DOCKERHUB_AMD_USERNAME }}
password: ${{ secrets.DOCKERHUB_AMD_TOKEN }}

- name: Build and Push
run: |
version=${{ steps.version.outputs.version }}
echo "Version: ${version}"

if [ "${{ matrix.gpu_arch }}" = "gfx942-rocm720" ]; then
rocm_tag="rocm720-mi30x"
elif [ "${{ matrix.gpu_arch }}" = "gfx950-rocm720" ]; then
rocm_tag="rocm720-mi35x"
else
echo "Unsupported gfx arch"
exit 1
fi

tag=v${version}-${rocm_tag}

docker build . -f docker/rocm720.Dockerfile --build-arg BUILD_TYPE=${{ matrix.build_type }} --build-arg GPU_ARCH=${{ matrix.gpu_arch }} -t rocm/sgl-dev:${tag}-${{ env.DATE }}-preview --no-cache
docker push rocm/sgl-dev:${tag}-${{ env.DATE }}-preview
48 changes: 48 additions & 0 deletions docker/aiter.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
diff --git a/csrc/py_itfs_cu/asm_mla.cu b/csrc/py_itfs_cu/asm_mla.cu
index 995364105..0adab889e 100644
--- a/csrc/py_itfs_cu/asm_mla.cu
+++ b/csrc/py_itfs_cu/asm_mla.cu
@@ -283,14 +283,14 @@ void mla_decode_stage1_asm_fwd(
else if(max_seqlen_q <= 4)
{
// assert(false);
- //sub_Q = 128;
- //static AiterAsmKernel impl_fp8(
- // "_ZN5aiter36mla_a8w8_qh16_qseqlen4_gqaratio16_psE",
- // "/mla/mla_a8w8_qh16_qseqlen4_gqaratio16_ps.co");
- sub_Q = 64;
+ sub_Q = 128;
static AiterAsmKernel impl_fp8(
- "_ZN5aiter36mla_a8w8_qh64_qseqlen4_gqaratio16_psE",
- "/mla/mla_a8w8_qh64_qseqlen4_gqaratio16_ps.co");
+ "_ZN5aiter36mla_a8w8_qh16_qseqlen4_gqaratio16_psE",
+ "/mla/mla_a8w8_qh16_qseqlen4_gqaratio16_ps.co");
+ //sub_Q = 64;
+ //static AiterAsmKernel impl_fp8(
+ // "_ZN5aiter36mla_a8w8_qh64_qseqlen4_gqaratio16_psE",
+ // "/mla/mla_a8w8_qh64_qseqlen4_gqaratio16_ps.co");
impl_ptr = &impl_fp8;
}
else
@@ -319,14 +319,14 @@ void mla_decode_stage1_asm_fwd(
else if(max_seqlen_q <= 4)
{
// assert(false);
- //sub_Q = 128;
- //static AiterAsmKernel impl_fp8(
- // "_ZN5aiter33mla_a8w8_qh16_qseqlen4_gqaratio16E",
- // "/mla/mla_a8w8_qh16_qseqlen4_gqaratio16.co");
- sub_Q = 64;
+ sub_Q = 128;
static AiterAsmKernel impl_fp8(
- "_ZN5aiter33mla_a8w8_qh64_qseqlen4_gqaratio16E",
- "/mla/mla_a8w8_qh64_qseqlen4_gqaratio16.co");
+ "_ZN5aiter33mla_a8w8_qh16_qseqlen4_gqaratio16E",
+ "/mla/mla_a8w8_qh16_qseqlen4_gqaratio16.co");
+ //sub_Q = 64;
+ //static AiterAsmKernel impl_fp8(
+ // "_ZN5aiter33mla_a8w8_qh64_qseqlen4_gqaratio16E",
+ // "/mla/mla_a8w8_qh64_qseqlen4_gqaratio16.co");
impl_ptr = &impl_fp8;
}
else
Loading
Loading