diff --git a/benchmarks/single_node/dsv4_fp8_h200_mtp.sh b/benchmarks/single_node/dsv4_fp8_h200_mtp.sh
index 5a6834757..ef0a0a8ad 100755
--- a/benchmarks/single_node/dsv4_fp8_h200_mtp.sh
+++ b/benchmarks/single_node/dsv4_fp8_h200_mtp.sh
@@ -2,7 +2,7 @@
 
 # DeepSeek-V4-Pro H200 vLLM MTP variant of the recipe at
 # https://vllm.ai/blog/deepseek-v4. Mirrors dsv4_fp8_h200.sh but adds
-# --speculative-config '{"method":"mtp","num_speculative_tokens":1}' and
+# --speculative-config '{"method":"mtp","num_speculative_tokens":2}' and
 # routes prompts through chat-formatted encoding via --dsv4 (required for
 # meaningful MTP acceptance numbers per AGENTS.md).
 
@@ -65,7 +65,7 @@ $MAX_MODEL_LEN_ARG \
 --max-num-batched-tokens 512 \
 --no-enable-flashinfer-autotune \
 --compilation-config '{"mode":0,"cudagraph_mode":"FULL_DECODE_ONLY"}' \
---speculative-config '{"method":"mtp","num_speculative_tokens":1}' \
+--speculative-config '{"method":"mtp","num_speculative_tokens":2}' \
 --tokenizer-mode deepseek_v4 \
 --tool-call-parser deepseek_v4 \
 --enable-auto-tool-choice \
diff --git a/perf-changelog.yaml b/perf-changelog.yaml
index 98fa4e8b3..b04ae1947 100644
--- a/perf-changelog.yaml
+++ b/perf-changelog.yaml
@@ -2207,3 +2207,10 @@
     - "run_benchmark_serving uses --dsv4 (chat-formatted prompts) per the AGENTS.md MTP rule, since EAGLE-style speculative decoding regresses acceptance on raw random tokens"
     - "Search space mirrors the non-MTP H200 entry: TP=8, EP=8, DP-attn=true, CONC 4-64 for both 1k1k and 8k1k, with spec-decoding: mtp"
   pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1222
+
+- config-keys:
+    - dsv4-fp8-h200-vllm-mtp
+  description:
+    - "Bump --speculative-config num_speculative_tokens from 1 to 2 (`{\"method\":\"mtp\",\"num_speculative_tokens\":2}`)"
+    - "Re-test whether H200 MTP kernels accept 2 draft tokens — Blackwell MTP runs at 2 (per @wzhao18's vLLM Blackwell MTP submission); checking if H200 has parity now"
+  pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1279