diff --git a/docs_new/src/snippets/autoregressive/mimo-v25-deployment.jsx b/docs_new/src/snippets/autoregressive/mimo-v25-deployment.jsx
index b0261afd711d..31b0fcad9de6 100644
--- a/docs_new/src/snippets/autoregressive/mimo-v25-deployment.jsx
+++ b/docs_new/src/snippets/autoregressive/mimo-v25-deployment.jsx
@@ -230,6 +230,12 @@ export const MiMoV25Deployment = () => {
       // Recipe sources:
       //   v7x: tp=ep=32, dp=4, omits --attention-backend, mem-frac 0.95, swa 0.25
       //   v6e: tp=ep=64, dp=8, --attention-backend fa,    mem-frac 0.92, swa 0.15
+      //
+      // sgl-jax conventions:
+      //   - `--tp-size` is always the total JAX device count; per-DP TP is
+      //     derived automatically as tp/dp.
+      //   - No `--enable-dp-attention` flag — DP attention is the default
+      //     (FFN layers auto-pick EP-split for MoE, attn-TP-split for dense).
       const isV7x = hardware === "tpu-v7x";
       const useEp = expertParallelism === "enabled";
       const useDpAttn = dpAttention === "enabled";
@@ -239,7 +245,7 @@ export const MiMoV25Deployment = () => {
       flags.push("  --trust-remote-code");
       flags.push(`  --tp-size ${tp}`);
       if (useEp) flags.push(`  --ep-size ${tp}`);
-      if (useDpAttn) flags.push(`  --dp-size ${dpSize}`, "  --enable-dp-attention");
+      if (useDpAttn) flags.push(`  --dp-size ${dpSize}`);
       flags.push("  --moe-backend fused");
       if (!isV7x) flags.push("  --attention-backend fa");
       flags.push("  --host 0.0.0.0");