diff --git a/recipes/gb200-fp8/1k1k/low-latency-mtp.yaml b/recipes/gb200-fp8/1k1k/low-latency-mtp.yaml index 08df748c..f7cbe492 100644 --- a/recipes/gb200-fp8/1k1k/low-latency-mtp.yaml +++ b/recipes/gb200-fp8/1k1k/low-latency-mtp.yaml @@ -1,4 +1,4 @@ -name: "gb200-fp8-1p-2d-low-latency-mtp" +name: "gb200-fp8-1p-2d-low-latency-mtp3" frontend: nginx_container: nginx @@ -73,11 +73,11 @@ backend: watchdog-timeout: 1000000 context-length: 2200 disaggregation-mode: "prefill" - mem-fraction-static: 0.90 + mem-fraction-static: 0.95 max-total-tokens: 8192 chunked-prefill-size: 8192 cuda-graph-max-bs: 128 - max-running-requests: 512 + max-running-requests: 128 load-balance-method: "round_robin" scheduler-recv-interval: 10 enable-flashinfer-allreduce-fusion: true @@ -88,9 +88,9 @@ backend: expert-parallel-size: 1 disaggregation-transfer-backend: "nixl" speculative-algorithm: "EAGLE" - speculative-num-steps: 2 + speculative-num-steps: 3 speculative-eagle-topk: 1 - speculative-num-draft-tokens: 3 + speculative-num-draft-tokens: 4 decode: served-model-name: "deepseek-ai/DeepSeek-R1" @@ -118,9 +118,9 @@ backend: expert-parallel-size: 1 disaggregation-transfer-backend: "nixl" speculative-algorithm: "EAGLE" - speculative-num-steps: 2 + speculative-num-steps: 3 speculative-eagle-topk: 1 - speculative-num-draft-tokens: 3 + speculative-num-draft-tokens: 4 benchmark: type: "sa-bench" diff --git a/recipes/gb200-fp8/1k8k/low-latency-mtp.yaml b/recipes/gb200-fp8/1k8k/low-latency-mtp.yaml index bfde4fa4..0c5e18a3 100644 --- a/recipes/gb200-fp8/1k8k/low-latency-mtp.yaml +++ b/recipes/gb200-fp8/1k8k/low-latency-mtp.yaml @@ -1,4 +1,4 @@ -name: "gb200-fp8-1k8k-low-latency-mtp" +name: "gb200-fp8-1k8k-low-latency-mtp3" frontend: nginx_container: nginx @@ -86,9 +86,9 @@ backend: expert-parallel-size: 1 disaggregation-transfer-backend: "nixl" speculative-algorithm: "EAGLE" - speculative-num-steps: 2 + speculative-num-steps: 3 speculative-eagle-topk: 1 - speculative-num-draft-tokens: 3 + speculative-num-draft-tokens: 4 decode: served-model-name: "deepseek-ai/DeepSeek-R1" @@ -117,9 +117,9 @@ backend: expert-parallel-size: 1 disaggregation-transfer-backend: "nixl" speculative-algorithm: "EAGLE" - speculative-num-steps: 2 + speculative-num-steps: 3 speculative-eagle-topk: 1 - speculative-num-draft-tokens: 3 + speculative-num-draft-tokens: 4 benchmark: type: "sa-bench"