Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
952b218
Change AMD MI355 docker image to lmsysorg/sglang:v0.5.5.post2-rocm700…
rkarhila-amd Nov 19, 2025
15452f7
Merge branch 'main' into update_AMD_dsr1fp8_images
rkarhila-amd Nov 20, 2025
2590adf
Adjust preview for dark mode and light mode (#250)
austenstone Nov 21, 2025
f527687
adding ISL/OSL to collect results table summary (#249)
jgangani Nov 21, 2025
e6d24b2
chore: refactor Docker runner launch to be like SLURM (#227)
cquil11 Nov 21, 2025
1f75582
Bump actions/checkout from 5.0.0 to 6.0.0 in the github-actions group…
dependabot[bot] Nov 24, 2025
ba61398
Add b200 DGXC node to b200 runners list (#245)
Ankur-singh Nov 30, 2025
2635908
Bump actions/setup-python in the github-actions group (#259)
dependabot[bot] Dec 1, 2025
1520664
feat: refresh GB200 SGLang DSR1 submission (#257)
yunzhoul-nv Dec 1, 2025
de4245a
Update GPTOSS B200 TRTLLM (#266)
jgangani Dec 2, 2025
3335bec
Merge branch 'main' into update_AMD_dsr1fp8_images
rkarhila-amd Dec 3, 2025
fdedafb
Fixed community container for MI35x dsr1 for fp8 for real
rkarhila-amd Dec 3, 2025
a3f6a6a
Merge branch 'main' into update_AMD_dsr1fp8_images
cquil11 Dec 3, 2025
a470d47
Merge branch 'main' into update_AMD_dsr1fp8_images
cquil11 Dec 3, 2025
07d3877
Update dsr1_fp4_mi355x_docker.sh with env flags
ppalanga Dec 4, 2025
5bdbd31
Update dsr1_fp4_mi355x_slurm.sh
ppalanga Dec 4, 2025
e0706e6
Merge pull request #1 from rkarhila-amd/update_AMD_dsr1fp4_image
rkarhila-amd Dec 4, 2025
cbd0608
from post2 to post3 for fp8
rkarhila-amd Dec 4, 2025
e55d426
tidy formatting
rkarhila-amd Dec 4, 2025
1a491ac
Merge branch 'main' into update_AMD_dsr1fp8_images
cquil11 Dec 4, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/configs/amd-master.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
dsr1-fp4-mi355x-sglang:
image: rocm/7.0:rocm7.0_ubuntu_22.04_sgl-dev-v0.5.2-rocm7.0-mi35x-20250915
image: lmsysorg/sglang:v0.5.5.post2-rocm700-mi35x
Comment thread
cquil11 marked this conversation as resolved.
model: amd/DeepSeek-R1-0528-MXFP4-Preview
model-prefix: dsr1
runner: mi355x
Expand Down Expand Up @@ -63,7 +63,7 @@ dsr1-fp8-mi325x-sglang:
- { tp: 8, conc-start: 4, conc-end: 64 }

dsr1-fp8-mi355x-sglang:
image: rocm/7.0:rocm7.0_ubuntu_22.04_sgl-dev-v0.5.2-rocm7.0-mi35x-20250915
image: lmsysorg/sglang:v0.5.5.post3-rocm700-mi35x
model: deepseek-ai/DeepSeek-R1-0528
model-prefix: dsr1
runner: mi355x
Expand Down
1 change: 1 addition & 0 deletions benchmarks/dsr1_fp4_mi355x_docker.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
# RESULT_FILENAME
# NUM_PROMPTS
export SGLANG_USE_AITER=1
export ROCM_QUICK_REDUCE_QUANTIZATION=INT4

PREFILL_SIZE=196608
if [[ "$ISL" == "8192" && "$OSL" == "1024" ]]; then
Expand Down
1 change: 1 addition & 0 deletions benchmarks/dsr1_fp4_mi355x_slurm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
# RANDOM_RANGE_RATIO
# RESULT_FILENAME
export SGLANG_USE_AITER=1
export ROCM_QUICK_REDUCE_QUANTIZATION=INT4
SERVER_LOG=$(mktemp /tmp/server-XXXXXX.log)

PREFILL_SIZE=196608
Expand Down
5 changes: 5 additions & 0 deletions benchmarks/dsr1_fp8_mi355x_docker.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,14 @@
# https://rocm.docs.amd.com/en/docs-7.0-docker/benchmark-docker/inference-sglang-deepseek-r1-fp8.html

export SGLANG_USE_AITER=1
export RCCL_MSCCL_ENABLE=0
export ROCM_QUICK_REDUCE_QUANTIZATION=INT4


SERVER_LOG=$(mktemp /tmp/server-XXXXXX.log)

python3 -m sglang.launch_server \
--attention-backend aiter \
--model-path $MODEL \
--host=0.0.0.0 \
--port $PORT \
Expand All @@ -27,6 +31,7 @@ python3 -m sglang.launch_server \
--mem-fraction-static 0.8 --disable-radix-cache \
--num-continuous-decode-steps 4 \
--max-prefill-tokens 196608 \
--enable-torch-compile \
--cuda-graph-max-bs 128 > $SERVER_LOG 2>&1 &

SERVER_PID=$!
Expand Down
6 changes: 5 additions & 1 deletion benchmarks/dsr1_fp8_mi355x_slurm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,14 @@

export HF_MODULES_CACHE="/tmp/hf_modules_cache/"
export SGLANG_USE_AITER=1
export RCCL_MSCCL_ENABLE=0
export ROCM_QUICK_REDUCE_QUANTIZATION=INT4

SERVER_LOG=$(mktemp /tmp/server-XXXXXX.log)

set -x
python3 -m sglang.launch_server \
--attention-backend aiter \
--model-path $MODEL \
--host=0.0.0.0 \
--port $PORT \
Expand All @@ -27,7 +30,8 @@ python3 -m sglang.launch_server \
--disable-radix-cache \
--num-continuous-decode-steps 4 \
--max-prefill-tokens 196608 \
--cuda-graph-max-bs 128 > $SERVER_LOG 2>&1 &
--cuda-graph-max-bs 128 \
--enable-torch-compile > $SERVER_LOG 2>&1 &

SERVER_PID=$!

Expand Down