diff --git a/recipes/gb300-fp8/1k1k/stp/low-latency.yaml b/recipes/gb300-fp8/1k1k/stp/low-latency.yaml index 57045ec9..c6c0df8a 100644 --- a/recipes/gb300-fp8/1k1k/stp/low-latency.yaml +++ b/recipes/gb300-fp8/1k1k/stp/low-latency.yaml @@ -8,9 +8,6 @@ model: frontend: nginx_container: nginx -extra_mount: # add this if you need to mount extra directories to the container - - "/lustre:/lustre" - resources: gpu_type: "gb300" prefill_nodes: 1 diff --git a/recipes/gb300-fp8/1k1k/stp/max.yaml b/recipes/gb300-fp8/1k1k/stp/max.yaml index 0836c13f..34767372 100644 --- a/recipes/gb300-fp8/1k1k/stp/max.yaml +++ b/recipes/gb300-fp8/1k1k/stp/max.yaml @@ -10,8 +10,6 @@ model: frontend: nginx_container: nginx -extra_mount: # add this if you need to mount extra directories to the container - - "/lustre:/lustre" resources: gpu_type: "gb300" prefill_nodes: 2 diff --git a/recipes/gb300-fp8/1k1k/stp/mid.yaml b/recipes/gb300-fp8/1k1k/stp/mid.yaml index 4a15b5f5..13df99c3 100644 --- a/recipes/gb300-fp8/1k1k/stp/mid.yaml +++ b/recipes/gb300-fp8/1k1k/stp/mid.yaml @@ -9,8 +9,6 @@ model: frontend: nginx_container: nginx -extra_mount: # add this if you need to mount extra directories to the container - - "/lustre:/lustre" resources: gpu_type: "gb300" prefill_nodes: 4 diff --git a/recipes/gb300-fp8/1k8k/stp/low-latency.yaml b/recipes/gb300-fp8/1k8k/stp/low-latency.yaml index b1e301a6..40e7112c 100644 --- a/recipes/gb300-fp8/1k8k/stp/low-latency.yaml +++ b/recipes/gb300-fp8/1k8k/stp/low-latency.yaml @@ -5,6 +5,9 @@ model: container: "lmsysorg/sglang:v0.5.8-cu130-runtime" precision: "fp8" +frontend: + nginx_container: nginx + resources: gpu_type: "gb300" prefill_nodes: 1 diff --git a/recipes/gb300-fp8/1k8k/stp/max.yaml b/recipes/gb300-fp8/1k8k/stp/max.yaml index 8de3e2b6..34d2d7fd 100644 --- a/recipes/gb300-fp8/1k8k/stp/max.yaml +++ b/recipes/gb300-fp8/1k8k/stp/max.yaml @@ -7,6 +7,9 @@ model: container: "lmsysorg/sglang:v0.5.8-cu130-runtime" precision: "fp8" +frontend: + nginx_container: nginx + resources: gpu_type: "gb300" prefill_nodes: 2 @@ -166,6 +169,6 @@ benchmark: type: "sa-bench" isl: 1024 osl: 8192 - concurrencies: [8192] + concurrencies: [8192,10240] req_rate: "inf" diff --git a/recipes/gb300-fp8/1k8k/stp/mid.yaml b/recipes/gb300-fp8/1k8k/stp/mid.yaml index 99068a97..7b30f95a 100644 --- a/recipes/gb300-fp8/1k8k/stp/mid.yaml +++ b/recipes/gb300-fp8/1k8k/stp/mid.yaml @@ -7,6 +7,9 @@ model: container: "lmsysorg/sglang:v0.5.8-cu130-runtime" precision: "fp8" +frontend: + nginx_container: nginx + resources: gpu_type: "gb300" prefill_nodes: 2 diff --git a/recipes/gb300-fp8/8k1k/stp/low-latency.yaml b/recipes/gb300-fp8/8k1k/stp/low-latency.yaml index 088e6517..501ec759 100644 --- a/recipes/gb300-fp8/8k1k/stp/low-latency.yaml +++ b/recipes/gb300-fp8/8k1k/stp/low-latency.yaml @@ -8,9 +8,6 @@ model: frontend: nginx_container: nginx -extra_mount: # add this if you need to mount extra directories to the container - - "/lustre:/lustre" - resources: gpu_type: "gb300" prefill_nodes: 1 diff --git a/recipes/gb300-fp8/8k1k/stp/max.yaml b/recipes/gb300-fp8/8k1k/stp/max.yaml index ec102ed6..1e3ff57e 100644 --- a/recipes/gb300-fp8/8k1k/stp/max.yaml +++ b/recipes/gb300-fp8/8k1k/stp/max.yaml @@ -10,8 +10,6 @@ model: frontend: nginx_container: nginx -extra_mount: # add this if you need to mount extra directories to the container - - "/lustre:/lustre" resources: gpu_type: "gb300" prefill_nodes: 12 @@ -168,6 +166,6 @@ benchmark: type: "sa-bench" isl: 8192 osl: 1024 - concurrencies: [2048,4096,10240] + concurrencies: [2048,4096] req_rate: "inf" diff --git a/recipes/gb300-fp8/8k1k/stp/mid.yaml b/recipes/gb300-fp8/8k1k/stp/mid.yaml index a9bef3d9..33d13fab 100644 --- a/recipes/gb300-fp8/8k1k/stp/mid.yaml +++ b/recipes/gb300-fp8/8k1k/stp/mid.yaml @@ -10,8 +10,6 @@ model: frontend: nginx_container: nginx -extra_mount: # add this if you need to mount extra directories to the container - - "/lustre:/lustre" resources: gpu_type: "gb300" prefill_nodes: 10 @@ -168,6 +166,6 @@ benchmark: type: "sa-bench" isl: 8192 osl: 1024 - concurrencies: [128,256,512,1024,2048,4096] + concurrencies: [128,256,512,1024] req_rate: "inf"