From 773ef8894a0d78a463dee8e9b271c07c20ba90d0 Mon Sep 17 00:00:00 2001 From: Grace Ho Date: Mon, 2 Feb 2026 14:15:32 -0800 Subject: [PATCH 1/4] add nginx frontend containers --- recipes/gb300-fp8/1k8k/stp/low-latency.yaml | 3 +++ recipes/gb300-fp8/1k8k/stp/max.yaml | 3 +++ recipes/gb300-fp8/1k8k/stp/mid.yaml | 3 +++ 3 files changed, 9 insertions(+) diff --git a/recipes/gb300-fp8/1k8k/stp/low-latency.yaml b/recipes/gb300-fp8/1k8k/stp/low-latency.yaml index b1e301a6..40e7112c 100644 --- a/recipes/gb300-fp8/1k8k/stp/low-latency.yaml +++ b/recipes/gb300-fp8/1k8k/stp/low-latency.yaml @@ -5,6 +5,9 @@ model: container: "lmsysorg/sglang:v0.5.8-cu130-runtime" precision: "fp8" +frontend: + nginx_container: nginx + resources: gpu_type: "gb300" prefill_nodes: 1 diff --git a/recipes/gb300-fp8/1k8k/stp/max.yaml b/recipes/gb300-fp8/1k8k/stp/max.yaml index 8de3e2b6..83063029 100644 --- a/recipes/gb300-fp8/1k8k/stp/max.yaml +++ b/recipes/gb300-fp8/1k8k/stp/max.yaml @@ -7,6 +7,9 @@ model: container: "lmsysorg/sglang:v0.5.8-cu130-runtime" precision: "fp8" +frontend: + nginx_container: nginx + resources: gpu_type: "gb300" prefill_nodes: 2 diff --git a/recipes/gb300-fp8/1k8k/stp/mid.yaml b/recipes/gb300-fp8/1k8k/stp/mid.yaml index 99068a97..7b30f95a 100644 --- a/recipes/gb300-fp8/1k8k/stp/mid.yaml +++ b/recipes/gb300-fp8/1k8k/stp/mid.yaml @@ -7,6 +7,9 @@ model: container: "lmsysorg/sglang:v0.5.8-cu130-runtime" precision: "fp8" +frontend: + nginx_container: nginx + resources: gpu_type: "gb300" prefill_nodes: 2 From a3e27978f4ba38f949e87e9a720e5d314649ee85 Mon Sep 17 00:00:00 2001 From: ishandhanani Date: Mon, 2 Feb 2026 14:19:11 -0800 Subject: [PATCH 2/4] lustre --- recipes/gb300-fp8/8k1k/stp/max.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/recipes/gb300-fp8/8k1k/stp/max.yaml b/recipes/gb300-fp8/8k1k/stp/max.yaml index ec102ed6..826cb190 100644 --- a/recipes/gb300-fp8/8k1k/stp/max.yaml +++ b/recipes/gb300-fp8/8k1k/stp/max.yaml @@ -10,8 +10,6 @@ model: frontend: nginx_container: nginx -extra_mount: # add this if you need to mount extra directories to the container - - "/lustre:/lustre" resources: gpu_type: "gb300" prefill_nodes: 12 From 568816ed97e2dc2b1f3c1039f59737eea0ae2613 Mon Sep 17 00:00:00 2001 From: ishandhanani Date: Mon, 2 Feb 2026 14:20:34 -0800 Subject: [PATCH 3/4] lustre --- recipes/gb300-fp8/1k1k/stp/low-latency.yaml | 3 --- recipes/gb300-fp8/1k1k/stp/max.yaml | 2 -- recipes/gb300-fp8/1k1k/stp/mid.yaml | 2 -- recipes/gb300-fp8/8k1k/stp/low-latency.yaml | 3 --- recipes/gb300-fp8/8k1k/stp/mid.yaml | 2 -- 5 files changed, 12 deletions(-) diff --git a/recipes/gb300-fp8/1k1k/stp/low-latency.yaml b/recipes/gb300-fp8/1k1k/stp/low-latency.yaml index 57045ec9..c6c0df8a 100644 --- a/recipes/gb300-fp8/1k1k/stp/low-latency.yaml +++ b/recipes/gb300-fp8/1k1k/stp/low-latency.yaml @@ -8,9 +8,6 @@ model: frontend: nginx_container: nginx -extra_mount: # add this if you need to mount extra directories to the container - - "/lustre:/lustre" - resources: gpu_type: "gb300" prefill_nodes: 1 diff --git a/recipes/gb300-fp8/1k1k/stp/max.yaml b/recipes/gb300-fp8/1k1k/stp/max.yaml index 0836c13f..34767372 100644 --- a/recipes/gb300-fp8/1k1k/stp/max.yaml +++ b/recipes/gb300-fp8/1k1k/stp/max.yaml @@ -10,8 +10,6 @@ model: frontend: nginx_container: nginx -extra_mount: # add this if you need to mount extra directories to the container - - "/lustre:/lustre" resources: gpu_type: "gb300" prefill_nodes: 2 diff --git a/recipes/gb300-fp8/1k1k/stp/mid.yaml b/recipes/gb300-fp8/1k1k/stp/mid.yaml index 4a15b5f5..13df99c3 100644 --- a/recipes/gb300-fp8/1k1k/stp/mid.yaml +++ b/recipes/gb300-fp8/1k1k/stp/mid.yaml @@ -9,8 +9,6 @@ model: frontend: nginx_container: nginx -extra_mount: # add this if you need to mount extra directories to the container - - "/lustre:/lustre" resources: gpu_type: "gb300" prefill_nodes: 4 diff --git a/recipes/gb300-fp8/8k1k/stp/low-latency.yaml b/recipes/gb300-fp8/8k1k/stp/low-latency.yaml index 088e6517..501ec759 100644 --- a/recipes/gb300-fp8/8k1k/stp/low-latency.yaml +++ b/recipes/gb300-fp8/8k1k/stp/low-latency.yaml @@ -8,9 +8,6 @@ model: frontend: nginx_container: nginx -extra_mount: # add this if you need to mount extra directories to the container - - "/lustre:/lustre" - resources: gpu_type: "gb300" prefill_nodes: 1 diff --git a/recipes/gb300-fp8/8k1k/stp/mid.yaml b/recipes/gb300-fp8/8k1k/stp/mid.yaml index a9bef3d9..49df8abe 100644 --- a/recipes/gb300-fp8/8k1k/stp/mid.yaml +++ b/recipes/gb300-fp8/8k1k/stp/mid.yaml @@ -10,8 +10,6 @@ model: frontend: nginx_container: nginx -extra_mount: # add this if you need to mount extra directories to the container - - "/lustre:/lustre" resources: gpu_type: "gb300" prefill_nodes: 10 From a6a2afa83e2f3c0a05b3b3860191403fc81fbe15 Mon Sep 17 00:00:00 2001 From: Grace Ho Date: Mon, 2 Feb 2026 16:51:17 -0800 Subject: [PATCH 4/4] modify points to put back TTFT>5 points --- recipes/gb300-fp8/1k8k/stp/max.yaml | 2 +- recipes/gb300-fp8/8k1k/stp/max.yaml | 2 +- recipes/gb300-fp8/8k1k/stp/mid.yaml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/recipes/gb300-fp8/1k8k/stp/max.yaml b/recipes/gb300-fp8/1k8k/stp/max.yaml index 83063029..34d2d7fd 100644 --- a/recipes/gb300-fp8/1k8k/stp/max.yaml +++ b/recipes/gb300-fp8/1k8k/stp/max.yaml @@ -169,6 +169,6 @@ benchmark: type: "sa-bench" isl: 1024 osl: 8192 - concurrencies: [8192] + concurrencies: [8192,10240] req_rate: "inf" diff --git a/recipes/gb300-fp8/8k1k/stp/max.yaml b/recipes/gb300-fp8/8k1k/stp/max.yaml index 826cb190..1e3ff57e 100644 --- a/recipes/gb300-fp8/8k1k/stp/max.yaml +++ b/recipes/gb300-fp8/8k1k/stp/max.yaml @@ -166,6 +166,6 @@ benchmark: type: "sa-bench" isl: 8192 osl: 1024 - concurrencies: [2048,4096,10240] + concurrencies: [2048,4096] req_rate: "inf" diff --git a/recipes/gb300-fp8/8k1k/stp/mid.yaml b/recipes/gb300-fp8/8k1k/stp/mid.yaml index 49df8abe..33d13fab 100644 --- a/recipes/gb300-fp8/8k1k/stp/mid.yaml +++ b/recipes/gb300-fp8/8k1k/stp/mid.yaml @@ -166,6 +166,6 @@ benchmark: type: "sa-bench" isl: 8192 osl: 1024 - concurrencies: [128,256,512,1024,2048,4096] + concurrencies: [128,256,512,1024] req_rate: "inf"