SemiAnalysisAI · Oseltamivir · Apr 27, 2026 · Apr 26, 2026 · Apr 26, 2026 · Apr 26, 2026
diff --git a/benchmarks/single_node/dsv4_fp4_b300_sglang_mtp.sh b/benchmarks/single_node/dsv4_fp4_b300_sglang_mtp.sh
@@ -128,6 +128,12 @@ wait_for_server_ready --port "$PORT" --server-log "$SERVER_LOG" --server-pid "$S
 
 pip install -q datasets pandas
 
+# --dsv4 routes prompts through encoding_dsv4.py (PR #1153), which emits the
+# <bos><User>...<Assistant><think> framing DeepSeek-V4-Pro expects. The DSv4-Pro
+# tokenizer ships without a jinja chat_template, so plain --use-chat-template
+# would crash; --dsv4 sidesteps that and satisfies the AGENTS.md rule that all
+# MTP scripts must benchmark against chat-formatted inputs (EAGLE acceptance
+# silently regresses on raw random tokens).
 run_benchmark_serving \
     --model "$MODEL" \
     --port "$PORT" \
@@ -138,7 +144,8 @@ run_benchmark_serving \
     --num-prompts $((CONC * 10)) \
     --max-concurrency "$CONC" \
     --result-filename "$RESULT_FILENAME" \
-    --result-dir "$PWD/"
+    --result-dir "$PWD/" \
+    --dsv4
 
 if [ "${RUN_EVAL}" = "true" ]; then
     run_eval --framework lm-eval --port "$PORT"

diff --git a/perf-changelog.yaml b/perf-changelog.yaml
@@ -1903,3 +1903,11 @@
     - "ISL=8192: TP4 conc 4-64; DP4 (dp-attn) conc 128-1024; DP8 (dp-attn) conc 1024-8192"
   pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1155
 
+- config-keys:
+    - dsv4-fp4-b300-sglang-mtp
+  description:
+    - "Pass --dsv4 (PR #1153) to run_benchmark_serving so prompts go through the DeepSeek-V4 chat template (encoding_dsv4.py)"
+    - "Restores AGENTS.md compliance: every MTP script must benchmark against chat-formatted inputs; PR #1166 had to drop --use-chat-template because the DSv4-Pro tokenizer has no jinja template, which is exactly what --dsv4 sidesteps"
+    - "EAGLE acceptance rate is sensitive to input distribution; raw random tokens silently regress acceptance vs. chat-framed prompts"
+  pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/XXX
+