Skip to content
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion .github/configs/amd-master.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -360,7 +360,7 @@ kimik2.5-fp4-mi355x-vllm:
- { tp: 8, conc-start: 4, conc-end: 64 }

minimaxm2.5-fp8-mi355x-vllm:
image: vllm/vllm-openai-rocm:v0.15.1
image: vllm/vllm-openai-rocm:v0.18.0
model: MiniMaxAI/MiniMax-M2.5
model-prefix: minimaxm2.5
runner: mi355x
Expand All @@ -373,16 +373,19 @@ minimaxm2.5-fp8-mi355x-vllm:
search-space:
- { tp: 2, conc-start: 4, conc-end: 64 }
- { tp: 4, conc-start: 4, conc-end: 64 }
- { tp: 8, ep: 8, conc-start: 32, conc-end: 256 }
- isl: 1024
osl: 8192
search-space:
- { tp: 2, conc-start: 4, conc-end: 64 }
- { tp: 4, conc-start: 4, conc-end: 64 }
- { tp: 8, ep: 8, conc-start: 32, conc-end: 256 }
- isl: 8192
osl: 1024
search-space:
- { tp: 2, conc-start: 4, conc-end: 64 }
- { tp: 4, conc-start: 4, conc-end: 64 }
- { tp: 8, ep: 8, conc-start: 32, conc-end: 256 }

minimaxm2.5-fp8-mi300x-vllm:
image: vllm/vllm-openai-rocm:v0.16.0
Expand Down
9 changes: 8 additions & 1 deletion benchmarks/single_node/minimaxm2.5_fp8_mi355x.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ source "$(dirname "$0")/../benchmark_lib.sh"
check_env_vars \
MODEL \
TP \
EP_SIZE \
CONC \
ISL \
OSL \
Expand All @@ -28,16 +29,22 @@ export VLLM_ROCM_USE_AITER=1
SERVER_LOG=/workspace/server.log
PORT=${PORT:-8888}

if [ "$EP_SIZE" -gt 1 ]; then
EP=" --enable-expert-parallel"
else
EP=" "
fi

# Start GPU monitoring (power, temperature, clocks every second)
start_gpu_monitor

set -x
vllm serve $MODEL --port $PORT \
--tensor-parallel-size=$TP \
$EP \
--gpu-memory-utilization 0.95 \
--max-model-len $MAX_MODEL_LEN \
--block-size=32 \
--disable-log-requests \
--trust-remote-code > $SERVER_LOG 2>&1 &

SERVER_PID=$!
Expand Down
8 changes: 8 additions & 0 deletions perf-changelog.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -994,3 +994,11 @@
- "EAGLE speculative decoding: num-steps 3, draft-tokens 4, topk 1"
- "New script: benchmarks/single_node/qwen3.5_fp8_b200_mtp.sh"
pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/898

- config-keys:
- minimaxm2.5-fp8-mi355x-vllm
description:
- "ADD minimax TP=8 with EP, in config of 1k1k, 1k8k, and 8k1k sequence lengths"
- "Config concurrency: 32-256"
- "update image to vllm/vllm-openai-rocm:v0.18.0"
pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/927
Comment thread
benenzhu marked this conversation as resolved.