Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .github/configs/amd-master.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -688,7 +688,7 @@ gptoss-fp4-mi355x-vllm:
- { tp: 8, conc-start: 4, conc-end: 8 }

gptoss-fp4-mi355x-atom:
image: rocm/atom:rocm7.1.1-ubuntu24.04-pytorch2.9-atom0.1.1-MI350x
image: rocm/atom:rocm7.2.2_ubuntu24.04_py3.12_pytorch_release_2.10.0_atom0.1.2.post
model: openai/gpt-oss-120b
model-prefix: gptoss
runner: mi355x
Expand All @@ -699,12 +699,12 @@ gptoss-fp4-mi355x-atom:
- isl: 1024
osl: 1024
search-space:
- { tp: 1, conc-start: 16, conc-end: 128 }
- { tp: 1, conc-start: 16, conc-end: 256 }
- { tp: 8, ep: 1, conc-start: 4, conc-end: 32 }
- isl: 8192
osl: 1024
search-space:
- { tp: 1, conc-start: 4, conc-end: 128 }
- { tp: 1, conc-start: 4, conc-end: 256 }
- { tp: 8, ep: 1, conc-start: 4, conc-end: 16 }

dsr1-fp8-mi355x-atom:
Expand Down
2 changes: 2 additions & 0 deletions benchmarks/single_node/gptoss_fp4_mi355x_atom.sh
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ fi

# Start GPU monitoring (power, temperature, clocks every second)
start_gpu_monitor
MEM_FRAC_STATIC=0.9

set -x

Expand All @@ -54,6 +55,7 @@ python3 -m atom.entrypoints.openai_server \
--server-port $PORT \
-tp $TP \
--kv_cache_dtype fp8 $CALCULATED_MAX_MODEL_LEN $EP \
--gpu-memory-utilization $MEM_FRAC_STATIC \
--block-size $BLOCK_SIZE > $SERVER_LOG 2>&1 &

SERVER_PID=$!
Expand Down
6 changes: 6 additions & 0 deletions perf-changelog.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1892,3 +1892,9 @@
description:
- "Pass --dsv4 to run_benchmark_serving so MTP benchmarks use the DSv4 chat template (PR #1153)"
pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1182

- config-keys:
- gptoss-fp4-mi355x-atom
description:
- "Update GPTOSS-120B FP4 MI355X Atom benchmark (rocm/atom:rocm7.2.2_ubuntu24.04_py3.12_pytorch_release_2.10.0_atom0.1.2.post)"
pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1195
Loading