Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion benchmarks/single_node/glm5_fp8_mi355x.sh
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,8 @@ python3 -m sglang.launch_server \
--mem-fraction-static 0.85 \
--model-loader-extra-config '{"enable_multithread_load": true, "num_threads": 8}' \
--nsa-prefill-backend tilelang \
--nsa-decode-backend tilelang $EVAL_CONTEXT_ARGS > $SERVER_LOG 2>&1 &
--nsa-decode-backend tilelang \
--disable-radix-cache $EVAL_CONTEXT_ARGS > $SERVER_LOG 2>&1 &

SERVER_PID=$!

Expand Down
3 changes: 2 additions & 1 deletion benchmarks/single_node/qwen3.5_bf16_mi300x.sh
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@ python3 -m sglang.launch_server \
--port $PORT \
--tensor-parallel-size $TP \
--trust-remote-code \
--mem-fraction-static 0.8 $EVAL_CONTEXT_ARGS > $SERVER_LOG 2>&1 &
--mem-fraction-static 0.8 \
--disable-radix-cache $EVAL_CONTEXT_ARGS > $SERVER_LOG 2>&1 &

SERVER_PID=$!

Expand Down
3 changes: 2 additions & 1 deletion benchmarks/single_node/qwen3.5_bf16_mi325x.sh
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@ python3 -m sglang.launch_server \
--port $PORT \
--tensor-parallel-size $TP \
--trust-remote-code \
--mem-fraction-static 0.8 $EVAL_CONTEXT_ARGS > $SERVER_LOG 2>&1 &
--mem-fraction-static 0.8 \
--disable-radix-cache $EVAL_CONTEXT_ARGS > $SERVER_LOG 2>&1 &

SERVER_PID=$!

Expand Down
3 changes: 2 additions & 1 deletion benchmarks/single_node/qwen3.5_bf16_mi355x.sh
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@ python3 -m sglang.launch_server \
--port $PORT \
--tensor-parallel-size $TP \
--trust-remote-code \
--mem-fraction-static 0.8 $EVAL_CONTEXT_ARGS > $SERVER_LOG 2>&1 &
--mem-fraction-static 0.8 \
--disable-radix-cache $EVAL_CONTEXT_ARGS > $SERVER_LOG 2>&1 &

SERVER_PID=$!

Expand Down
3 changes: 2 additions & 1 deletion benchmarks/single_node/qwen3.5_fp8_mi300x.sh
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ python3 -m sglang.launch_server \
--port $PORT \
--tensor-parallel-size $TP \
--trust-remote-code \
--mem-fraction-static 0.8 $EVAL_CONTEXT_ARGS > $SERVER_LOG 2>&1 &
--mem-fraction-static 0.8 \
--disable-radix-cache $EVAL_CONTEXT_ARGS > $SERVER_LOG 2>&1 &

SERVER_PID=$!

Expand Down
3 changes: 2 additions & 1 deletion benchmarks/single_node/qwen3.5_fp8_mi325x.sh
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ python3 -m sglang.launch_server \
--port $PORT \
--tensor-parallel-size $TP \
--trust-remote-code \
--mem-fraction-static 0.8 $EVAL_CONTEXT_ARGS > $SERVER_LOG 2>&1 &
--mem-fraction-static 0.8 \
--disable-radix-cache $EVAL_CONTEXT_ARGS > $SERVER_LOG 2>&1 &

SERVER_PID=$!

Expand Down
3 changes: 2 additions & 1 deletion benchmarks/single_node/qwen3.5_fp8_mi355x.sh
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@ python3 -m sglang.launch_server \
--port $PORT \
--tensor-parallel-size $TP \
--trust-remote-code \
--mem-fraction-static 0.8 $EVAL_CONTEXT_ARGS > $SERVER_LOG 2>&1 &
--mem-fraction-static 0.8 \
--disable-radix-cache $EVAL_CONTEXT_ARGS > $SERVER_LOG 2>&1 &

SERVER_PID=$!

Expand Down