Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion benchmarks/single_node/dsv4_fp4_b300_sglang_mtp.sh
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,12 @@ wait_for_server_ready --port "$PORT" --server-log "$SERVER_LOG" --server-pid "$S

pip install -q datasets pandas

# --dsv4 routes prompts through encoding_dsv4.py (PR #1153), which emits the
# <bos><User>...<Assistant><think> framing DeepSeek-V4-Pro expects. The DSv4-Pro
# tokenizer ships without a jinja chat_template, so plain --use-chat-template
# would crash; --dsv4 sidesteps that and satisfies the AGENTS.md rule that all
# MTP scripts must benchmark against chat-formatted inputs (EAGLE acceptance
# silently regresses on raw random tokens).
run_benchmark_serving \
--model "$MODEL" \
--port "$PORT" \
Expand All @@ -138,7 +144,8 @@ run_benchmark_serving \
--num-prompts $((CONC * 10)) \
--max-concurrency "$CONC" \
--result-filename "$RESULT_FILENAME" \
--result-dir "$PWD/"
--result-dir "$PWD/" \
--dsv4

if [ "${RUN_EVAL}" = "true" ]; then
run_eval --framework lm-eval --port "$PORT"
Expand Down
8 changes: 8 additions & 0 deletions perf-changelog.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1903,3 +1903,11 @@
- "ISL=8192: TP4 conc 4-64; DP4 (dp-attn) conc 128-1024; DP8 (dp-attn) conc 1024-8192"
pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1155

- config-keys:
- dsv4-fp4-b300-sglang-mtp
description:
- "Pass --dsv4 (PR #1153) to run_benchmark_serving so prompts go through the DeepSeek-V4 chat template (encoding_dsv4.py)"
- "Restores AGENTS.md compliance: every MTP script must benchmark against chat-formatted inputs; PR #1166 had to drop --use-chat-template because the DSv4-Pro tokenizer has no jinja template, which is exactly what --dsv4 sidesteps"
- "EAGLE acceptance rate is sensitive to input distribution; raw random tokens silently regress acceptance vs. chat-framed prompts"
pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/XXX

Loading