diff --git a/recipes/gb300-fp4/1k1k/low_latency.yaml b/recipes/gb300-fp4/1k1k/low_latency.yaml
new file mode 100644
index 00000000..3ec9dc7d
--- /dev/null
+++ b/recipes/gb300-fp4/1k1k/low_latency.yaml
@@ -0,0 +1,118 @@
+name: "gb300-fp4-low-latency-1k1k"
+
+dynamo:
+  version: 0.8.1
+
+frontend:  
+  type: dynamo  
+  enable_multiple_frontends: true
+  num_additional_frontends: 4
+  nginx_container: nginx
+
+model:
+  path: "dsfp4"
+  container: "lmsysorg/sglang:v0.5.8-cu130-runtime"
+  precision: "fp4"
+
+resources:
+  gpu_type: "gb300"
+  prefill_nodes: 1
+  decode_nodes: 2
+  prefill_workers: 1
+  decode_workers: 2
+  gpus_per_node: 4
+
+backend:
+
+  prefill_environment:
+    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: "1800"
+    PYTHONUNBUFFERED: "1"
+    DYN_SKIP_SGLANG_LOG_FORMATTING: "1"
+    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: "0"
+    SGLANG_DISABLE_TP_MEMORY_INBALANCE_CHECK: "1"
+    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: "100000"
+    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: "100000"
+    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: "100000"
+    SGLANG_DECODE_BOOTSTRAP_TIMEOUT: "1000"
+    MC_FORCE_MNNVL: "1"
+    NCCL_MNNVL_ENABLE: "1"
+    NCCL_CUMEM_ENABLE: "1"
+    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: "True"
+    SGLANG_ENABLE_JIT_DEEPGEMM: "false"
+
+  decode_environment:
+    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: "1800"
+    PYTHONUNBUFFERED: "1"
+    DYN_SKIP_SGLANG_LOG_FORMATTING: "1"
+    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: "0"
+    SGLANG_DISABLE_TP_MEMORY_INBALANCE_CHECK: "1"
+    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: "100000"
+    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: "100000"
+    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: "100000"
+    SGLANG_DECODE_BOOTSTRAP_TIMEOUT: "1000"
+    MC_FORCE_MNNVL: "1"
+    NCCL_MNNVL_ENABLE: "1"
+    NCCL_CUMEM_ENABLE: "1"
+    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: "True"
+    SGLANG_ENABLE_JIT_DEEPGEMM: "false"
+
+  sglang_config:
+    prefill:
+      disaggregation-mode: "prefill"
+      served-model-name: "deepseek-ai/DeepSeek-R1"
+      trust-remote-code: true
+      disable-radix-cache: true
+      kv-cache-dtype: "fp8_e4m3"
+      attention-backend: "trtllm_mla"
+      quantization: "modelopt_fp4"
+      moe-runner-backend: "flashinfer_trtllm"
+      stream-interval: 10
+      watchdog-timeout: 1000000
+      context-length: 2200
+      mem-fraction-static: 0.95
+      max-total-tokens: 8192
+      chunked-prefill-size: 8192
+      cuda-graph-max-bs: 256
+      max-running-requests: 512
+      scheduler-recv-interval: 10
+      enable-symm-mem: true
+      moe-dense-tp-size: 1
+      load-balance-method: "round_robin"
+      disaggregation-bootstrap-port: 30001
+      data-parallel-size: 1
+      tensor-parallel-size: 4
+      expert-parallel-size: 1
+      fp4-gemm-backend: "flashinfer_trtllm"
+      disaggregation-transfer-backend: nixl
+
+    decode:
+      disaggregation-mode: "decode"
+      served-model-name: "deepseek-ai/DeepSeek-R1"
+      prefill-round-robin-balance: true
+      trust-remote-code: true
+      disable-radix-cache: true
+      kv-cache-dtype: "fp8_e4m3"
+      attention-backend: "trtllm_mla"
+      quantization: "modelopt_fp4"
+      moe-runner-backend: "flashinfer_trtllm"
+      disaggregation-bootstrap-port: 30001
+      stream-interval: 10
+      watchdog-timeout: 1000000
+      context-length: 2200
+      mem-fraction-static: 0.95
+      chunked-prefill-size: 8192
+      cuda-graph-max-bs: 256
+      scheduler-recv-interval: 10
+      enable-symm-mem: true
+      moe-dense-tp-size: 1
+      tensor-parallel-size: 4
+      expert-parallel-size: 1
+      fp4-gemm-backend: "flashinfer_trtllm"
+      disaggregation-transfer-backend: nixl
+
+benchmark:
+  type: "sa-bench"
+  isl: 1024
+  osl: 1024
+  concurrencies: "4x8x32"
+  req_rate: "inf"
diff --git a/recipes/gb300-fp4/1k1k/max_tpt.yaml b/recipes/gb300-fp4/1k1k/max_tpt.yaml
new file mode 100644
index 00000000..e762725c
--- /dev/null
+++ b/recipes/gb300-fp4/1k1k/max_tpt.yaml
@@ -0,0 +1,184 @@
+name: "gb300-fp4-max-tpt-1k1k"
+
+dynamo:
+  version: 0.8.1
+
+frontend:  
+  type: dynamo  
+  enable_multiple_frontends: true
+  num_additional_frontends: 9
+  nginx_container: nginx
+
+model:
+  path: "dsfp4"
+  container: "lmsysorg/sglang:v0.5.8-cu130-runtime"
+  precision: "fp4"
+
+resources:
+  gpu_type: "gb300"
+  prefill_nodes: 4
+  decode_nodes: 12
+  prefill_workers: 4
+  decode_workers: 1
+  gpus_per_node: 4
+
+backend:
+
+  # Prefill-specific environment variables
+  prefill_environment:
+    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: "1800"
+    PYTHONUNBUFFERED: "1"
+    DYN_SKIP_SGLANG_LOG_FORMATTING: "1"
+    SGLANG_NVFP4_CKPT_FP8_GEMM_IN_ATTN: "1"
+    SGLANG_PER_TOKEN_GROUP_QUANT_8BIT_V2: "1"
+    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: "100000"
+    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: "100000"
+    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: "100000"
+    MC_TE_METRIC: "true"
+    MC_FORCE_MNNVL: "1"
+    NCCL_MNNVL_ENABLE: "1"
+    NCCL_CUMEM_ENABLE: "1"
+    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: "True"
+    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: "0"
+    SGLANG_DISABLE_TP_MEMORY_INBALANCE_CHECK: "1"
+
+  # Decode-specific environment variables
+  decode_environment:
+    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: "1800"
+    PYTHONUNBUFFERED: "1"
+    DYN_SKIP_SGLANG_LOG_FORMATTING: "1"
+    SGLANG_NVFP4_CKPT_FP8_GEMM_IN_ATTN: "1"
+    SGLANG_PER_TOKEN_GROUP_QUANT_8BIT_V2: "1"
+    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: "100000"
+    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: "100000"
+    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: "100000"
+    MC_TE_METRIC: "true"
+    MC_FORCE_MNNVL: "1"
+    NCCL_MNNVL_ENABLE: "1"
+    NCCL_CUMEM_ENABLE: "1"
+    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: "True"
+    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: "0"
+    SGLANG_DISABLE_TP_MEMORY_INBALANCE_CHECK: "1"
+    SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK: "1024"
+    SGLANG_MOE_NVFP4_DISPATCH: "1"
+
+  sglang_config:
+    prefill:
+      # Model configuration
+      served-model-name: "deepseek-ai/DeepSeek-R1"
+      trust-remote-code: true
+
+      # KV cache and attention
+      kv-cache-dtype: "fp8_e4m3"
+      attention-backend: "trtllm_mla"
+
+      # Quantization
+      quantization: "modelopt_fp4"
+      moe-runner-backend: "flashinfer_cutlass"
+
+      # Radix cache disabled
+      disable-radix-cache: true
+      disable-chunked-prefix-cache: true
+
+      # Other flags
+      stream-interval: 50
+      decode-log-interval: 1000
+      watchdog-timeout: 1000000
+      context-length: 2176
+      disable-shared-experts-fusion: true
+      eplb-algorithm: "deepseek"
+      disaggregation-bootstrap-port: 30001
+
+      # Prefill-specific mode
+      disaggregation-mode: "prefill"
+
+      # Memory and token limits
+      mem-fraction-static: 0.84
+      max-total-tokens: 131072
+      max-prefill-tokens: 32768
+      chunked-prefill-size: 65536
+      enable-single-batch-overlap: true
+
+      # Request handling
+      max-running-requests: 30000
+      load-balance-method: "round_robin"
+
+      # Performance optimizations
+      disable-cuda-graph: true
+      enable-dp-attention: true
+      disaggregation-transfer-backend: nixl
+      fp4-gemm-backend: "flashinfer_cutlass"
+
+      # Parallelism
+      tp-size: 4
+      dp-size: 4
+      ep-size: 4
+
+    decode:
+      # Model configuration
+      served-model-name: "deepseek-ai/DeepSeek-R1"
+      trust-remote-code: true
+
+      # KV cache and attention
+      kv-cache-dtype: "fp8_e4m3"
+      attention-backend: "trtllm_mla"
+
+      # Quantization
+      quantization: "modelopt_fp4"
+      moe-runner-backend: "flashinfer_cutedsl"
+
+      # Radix cache disabled
+      disable-radix-cache: true
+      disable-chunked-prefix-cache: true
+
+      # Other flags
+      stream-interval: 50
+      decode-log-interval: 1000
+      watchdog-timeout: 1000000
+      context-length: 2176
+      disable-shared-experts-fusion: true
+      eplb-algorithm: "deepseek"
+      disaggregation-bootstrap-port: 30001
+
+      # Decode-specific mode
+      disaggregation-mode: "decode"
+
+      # Memory and token limits
+      mem-fraction-static: 0.83
+      max-total-tokens: 3122380
+      chunked-prefill-size: 786432
+
+      # Request handling
+      max-running-requests: 67584
+      enable-single-batch-overlap: true
+
+      # DeepEP configuration
+      moe-a2a-backend: "deepep"
+      deepep-mode: "low_latency"
+      ep-dispatch-algorithm: "static"
+      ep-num-redundant-experts: 32
+
+      # CUDA graphs (extensive batch size list)
+      cuda-graph-bs: [1, 2, 4, 8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 112, 120, 128, 136, 144, 152, 160, 168, 176, 184, 192, 200, 208, 216, 224, 232, 240, 248, 256, 264, 272, 280, 288, 296, 304, 312, 320, 328, 336, 344, 352, 360, 368, 376, 384, 416, 448, 480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 1024]
+      num-reserved-decode-tokens: 112
+
+      # Additional decode optimizations
+      moe-dense-tp-size: 1
+      enable-dp-lm-head: true
+      prefill-round-robin-balance: true
+      enable-dp-attention: true
+      fp4-gemm-backend: "flashinfer_cutlass"
+      disaggregation-transfer-backend: nixl
+
+
+      # Parallelism
+      tp-size: 48
+      dp-size: 48
+      ep-size: 48
+
+benchmark:
+  type: "sa-bench"
+  isl: 1024
+  osl: 1024
+  concurrencies: "512x2048x4096x8192"
+  req_rate: "inf"
diff --git a/recipes/gb300-fp4/1k1k/mid_curve.yaml b/recipes/gb300-fp4/1k1k/mid_curve.yaml
new file mode 100644
index 00000000..e7a0abba
--- /dev/null
+++ b/recipes/gb300-fp4/1k1k/mid_curve.yaml
@@ -0,0 +1,182 @@
+name: "gb300-fp4-mid-curve-1k1k"
+
+dynamo:
+  version: 0.8.1
+
+frontend:  
+  type: dynamo  
+  enable_multiple_frontends: true
+  num_additional_frontends: 9
+  nginx_container: nginx
+
+model:
+  path: "dsfp4"
+  container: "lmsysorg/sglang:v0.5.8-cu130-runtime"
+  precision: "fp4"
+
+resources:
+  gpu_type: "gb300"
+  prefill_nodes: 4
+  decode_nodes: 8
+  prefill_workers: 4
+  decode_workers: 1
+  gpus_per_node: 4
+
+backend:
+
+  # Prefill-specific environment variables
+  prefill_environment:
+    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: "1800"
+    PYTHONUNBUFFERED: "1"
+    DYN_SKIP_SGLANG_LOG_FORMATTING: "1"
+    SGLANG_NVFP4_CKPT_FP8_GEMM_IN_ATTN: "1"
+    SGLANG_PER_TOKEN_GROUP_QUANT_8BIT_V2: "1"
+    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: "100000"
+    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: "100000"
+    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: "100000"
+    MC_TE_METRIC: "true"
+    MC_FORCE_MNNVL: "1"
+    NCCL_MNNVL_ENABLE: "1"
+    NCCL_CUMEM_ENABLE: "1"
+    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: "True"
+    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: "0"
+    SGLANG_DISABLE_TP_MEMORY_INBALANCE_CHECK: "1"
+
+  # Decode-specific environment variables
+  decode_environment:
+    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: "1800"
+    PYTHONUNBUFFERED: "1"
+    DYN_SKIP_SGLANG_LOG_FORMATTING: "1"
+    SGLANG_NVFP4_CKPT_FP8_GEMM_IN_ATTN: "1"
+    SGLANG_PER_TOKEN_GROUP_QUANT_8BIT_V2: "1"
+    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: "100000"
+    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: "100000"
+    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: "100000"
+    MC_TE_METRIC: "true"
+    MC_FORCE_MNNVL: "1"
+    NCCL_MNNVL_ENABLE: "1"
+    NCCL_CUMEM_ENABLE: "1"
+    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: "True"
+    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: "0"
+    SGLANG_DISABLE_TP_MEMORY_INBALANCE_CHECK: "1"
+    SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK: "1024"
+    SGLANG_MOE_NVFP4_DISPATCH: "1"
+
+  sglang_config:
+    prefill:
+      # Model configuration
+      served-model-name: "deepseek-ai/DeepSeek-R1"
+      trust-remote-code: true
+
+      # KV cache and attention
+      kv-cache-dtype: "fp8_e4m3"
+      attention-backend: "trtllm_mla"
+
+      # Quantization
+      quantization: "modelopt_fp4"
+      moe-runner-backend: "flashinfer_cutlass"
+
+      # Radix cache disabled
+      disable-radix-cache: true
+      disable-chunked-prefix-cache: true
+
+      # Other flags
+      stream-interval: 50
+      decode-log-interval: 1000
+      watchdog-timeout: 1000000
+      context-length: 2176
+      disable-shared-experts-fusion: true
+      eplb-algorithm: "deepseek"
+      disaggregation-bootstrap-port: 30001
+
+      # Prefill-specific mode
+      disaggregation-mode: "prefill"
+      disaggregation-transfer-backend: nixl
+
+      # Memory and token limits
+      mem-fraction-static: 0.84
+      max-total-tokens: 131072
+      max-prefill-tokens: 32768
+      chunked-prefill-size: 65536
+      enable-single-batch-overlap: true
+
+      # Request handling
+      max-running-requests: 30000
+      load-balance-method: "round_robin"
+
+      # Performance optimizations
+      disable-cuda-graph: true
+      enable-dp-attention: true
+      fp4-gemm-backend: "flashinfer_cutlass"
+
+      # Parallelism
+      tp-size: 4
+      dp-size: 4
+      ep-size: 4
+
+    decode:
+      # Model configuration
+      served-model-name: "deepseek-ai/DeepSeek-R1"
+      trust-remote-code: true
+
+      # KV cache and attention
+      kv-cache-dtype: "fp8_e4m3"
+      attention-backend: "trtllm_mla"
+
+      # Quantization
+      quantization: "modelopt_fp4"
+      moe-runner-backend: "flashinfer_cutedsl"
+
+      # Radix cache disabled
+      disable-radix-cache: true
+      disable-chunked-prefix-cache: true
+
+      # Other flags
+      stream-interval: 50
+      decode-log-interval: 1000
+      watchdog-timeout: 1000000
+      context-length: 2176
+      disable-shared-experts-fusion: true
+      eplb-algorithm: "deepseek"
+      disaggregation-bootstrap-port: 30001
+
+      # Decode-specific mode
+      disaggregation-mode: "decode"
+
+      # Memory and token limits
+      mem-fraction-static: 0.83
+      max-total-tokens: 3122380
+      chunked-prefill-size: 786432
+
+      # Request handling
+      max-running-requests: 67584
+
+      # DeepEP configuration
+      moe-a2a-backend: "deepep"
+      deepep-mode: "low_latency"
+      ep-dispatch-algorithm: "static"
+      ep-num-redundant-experts: 32
+
+      # CUDA graphs (extensive batch size list)
+      cuda-graph-bs: [1, 2, 4, 8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 112, 120, 128, 136, 144, 152, 160, 168, 176, 184, 192, 200, 208, 216, 224, 232, 240, 248, 256, 264, 272, 280, 288, 296, 304, 312, 320, 328, 336, 344, 352, 360, 368, 376, 384, 416, 448, 480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 1024]
+      num-reserved-decode-tokens: 112
+
+      # Additional decode optimizations
+      moe-dense-tp-size: 1
+      enable-dp-lm-head: true
+      prefill-round-robin-balance: true
+      enable-dp-attention: true
+      fp4-gemm-backend: "flashinfer_cutlass"
+      disaggregation-transfer-backend: nixl
+
+      # Parallelism
+      tp-size: 32
+      dp-size: 32
+      ep-size: 32
+
+benchmark:
+  type: "sa-bench"
+  isl: 1024
+  osl: 1024
+  concurrencies: "512x2048x4096x8192x12000x15000"
+  req_rate: "inf"
diff --git a/recipes/gb300-fp4/1k8k/low-latency.yaml b/recipes/gb300-fp4/1k8k/low-latency.yaml
new file mode 100644
index 00000000..20290618
--- /dev/null
+++ b/recipes/gb300-fp4/1k8k/low-latency.yaml
@@ -0,0 +1,115 @@
+name: "gb300-fp4-1k8k-low-latency"
+
+dynamo:
+  version: 0.8.1
+      
+frontend:  
+  type: dynamo  
+  enable_multiple_frontends: true
+  num_additional_frontends: 4
+  nginx_container: nginx
+
+model:
+  path: "dsfp4"
+  container: "lmsysorg/sglang:v0.5.8-cu130-runtime"
+  precision: "fp4"
+      
+resources:
+  gpu_type: "gb200"
+  prefill_nodes: 1
+  decode_nodes: 2
+  prefill_workers: 1
+  decode_workers: 2
+  gpus_per_node: 4
+      
+backend:
+  prefill_environment:
+    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: "1800"
+    PYTHONUNBUFFERED: "1"
+    DYN_SKIP_SGLANG_LOG_FORMATTING: "1"
+    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: "0"
+    SGLANG_DISABLE_TP_MEMORY_INBALANCE_CHECK: "1"
+    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: "100000"
+    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: "100000"
+    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: "100000"
+    SGLANG_DECODE_BOOTSTRAP_TIMEOUT: "1000"
+    MC_FORCE_MNNVL: "1"
+    NCCL_MNNVL_ENABLE: "1"
+    NCCL_CUMEM_ENABLE: "1"
+    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: "True"
+    SGLANG_ENABLE_JIT_DEEPGEMM: "false"
+
+  decode_environment:
+    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: "1800"
+    PYTHONUNBUFFERED: "1"
+    DYN_SKIP_SGLANG_LOG_FORMATTING: "1"
+    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: "0"
+    SGLANG_DISABLE_TP_MEMORY_INBALANCE_CHECK: "1"
+    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: "100000"
+    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: "100000"
+    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: "100000"
+    SGLANG_DECODE_BOOTSTRAP_TIMEOUT: "1000"
+    MC_FORCE_MNNVL: "1"
+    NCCL_MNNVL_ENABLE: "1"
+    NCCL_CUMEM_ENABLE: "1"
+    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: "True"
+    SGLANG_ENABLE_JIT_DEEPGEMM: "false"
+
+  sglang_config:
+    prefill:
+      served-model-name: "deepseek-ai/DeepSeek-R1"
+      disaggregation-mode: "prefill"
+      trust-remote-code: true
+      disable-radix-cache: true
+      kv-cache-dtype: "fp8_e4m3"
+      attention-backend: "trtllm_mla"
+      quantization: "modelopt_fp4"
+      moe-runner-backend: "flashinfer_trtllm"
+      stream-interval: 10
+      watchdog-timeout: 1000000
+      context-length: 10000
+      mem-fraction-static: 0.95
+      max-total-tokens: 8192
+      chunked-prefill-size: 8192
+      disable-cuda-graph: true
+      max-running-requests: 512
+      scheduler-recv-interval: 10
+      moe-dense-tp-size: 1
+      load-balance-method: "round_robin"
+      disaggregation-bootstrap-port: 30001
+      data-parallel-size: 1
+      tensor-parallel-size: 4
+      expert-parallel-size: 1
+      fp4-gemm-backend: "flashinfer_trtllm"
+      disaggregation-transfer-backend: nixl
+
+    decode:
+      served-model-name: "deepseek-ai/DeepSeek-R1"
+      disaggregation-mode: "decode"
+      prefill-round-robin-balance: true
+      trust-remote-code: true
+      disable-radix-cache: true
+      kv-cache-dtype: "fp8_e4m3"
+      attention-backend: "trtllm_mla"
+      quantization: "modelopt_fp4"
+      moe-runner-backend: "flashinfer_trtllm"
+      disaggregation-bootstrap-port: 30001
+      stream-interval: 10
+      watchdog-timeout: 1000000
+      context-length: 10000
+      mem-fraction-static: 0.95
+      chunked-prefill-size: 8192
+      cuda-graph-max-bs: 256
+      scheduler-recv-interval: 10
+      moe-dense-tp-size: 1
+      tensor-parallel-size: 4
+      expert-parallel-size: 1
+      fp4-gemm-backend: "flashinfer_trtllm"
+      disaggregation-transfer-backend: nixl
+
+benchmark:
+  type: "sa-bench"
+  isl: 1024
+  osl: 8192
+  concurrencies: "4x8x16x32"
+  req_rate: "inf"
diff --git a/recipes/gb300-fp4/1k8k/max-tpt.yaml b/recipes/gb300-fp4/1k8k/max-tpt.yaml
new file mode 100644
index 00000000..2af9ce6f
--- /dev/null
+++ b/recipes/gb300-fp4/1k8k/max-tpt.yaml
@@ -0,0 +1,248 @@
+name: "gb300-fp4-1k8k-max-tpt"
+
+dynamo:
+  version: 0.8.1
+
+frontend:
+  type: dynamo
+  enable_multiple_frontends: true
+  num_additional_frontends: 9
+  nginx_container: nginx
+
+model:
+  path: "dsfp4"
+  container: "lmsysorg/sglang:v0.5.8-cu130-runtime"
+  precision: "fp4"
+
+resources:
+  gpu_type: "gb300"
+  prefill_nodes: 4
+  decode_nodes: 12
+  prefill_workers: 4
+  decode_workers: 1
+  gpus_per_node: 4
+
+backend:
+  # Prefill-specific environment variables
+  prefill_environment:
+    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: "1800"
+    PYTHONUNBUFFERED: "1"
+    DYN_SKIP_SGLANG_LOG_FORMATTING: "1"
+    SGLANG_NVFP4_CKPT_FP8_GEMM_IN_ATTN: "1"
+    SGLANG_PER_TOKEN_GROUP_QUANT_8BIT_V2: "1"
+    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: "100000"
+    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: "100000"
+    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: "100000"
+    MC_TE_METRIC: "true"
+    MC_FORCE_MNNVL: "1"
+    NCCL_MNNVL_ENABLE: "1"
+    NCCL_CUMEM_ENABLE: "1"
+    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: "True"
+    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: "0"
+    SGLANG_DISABLE_TP_MEMORY_INBALANCE_CHECK: "1"
+
+  # Decode-specific environment variables
+  decode_environment:
+    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: "1800"
+    PYTHONUNBUFFERED: "1"
+    DYN_SKIP_SGLANG_LOG_FORMATTING: "1"
+    SGLANG_NVFP4_CKPT_FP8_GEMM_IN_ATTN: "1"
+    SGLANG_PER_TOKEN_GROUP_QUANT_8BIT_V2: "1"
+    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: "100000"
+    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: "100000"
+    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: "100000"
+    MC_TE_METRIC: "true"
+    MC_FORCE_MNNVL: "1"
+    NCCL_MNNVL_ENABLE: "1"
+    NCCL_CUMEM_ENABLE: "1"
+    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: "True"
+    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: "0"
+    SGLANG_DISABLE_TP_MEMORY_INBALANCE_CHECK: "1"
+    SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK: "1024"
+    SGLANG_MOE_NVFP4_DISPATCH: "1"
+
+  sglang_config:
+    prefill:
+      # Model configuration
+      served-model-name: "deepseek-ai/DeepSeek-R1"
+      trust-remote-code: true
+      disaggregation-transfer-backend: nixl
+
+      # KV cache and attention
+      kv-cache-dtype: "fp8_e4m3"
+      attention-backend: "trtllm_mla"
+
+      # Quantization
+      quantization: "modelopt_fp4"
+      moe-runner-backend: "flashinfer_cutlass"
+
+      # Radix cache disabled
+      disable-radix-cache: true
+      disable-chunked-prefix-cache: true
+
+      # Other flags
+      stream-interval: 50
+      decode-log-interval: 1000
+      watchdog-timeout: 1000000
+      context-length: 10000
+      disable-shared-experts-fusion: true
+      eplb-algorithm: "deepseek"
+      disaggregation-bootstrap-port: 30001
+
+      # Prefill-specific mode
+      disaggregation-mode: "prefill"
+
+      # Memory and token limits
+      mem-fraction-static: 0.84
+      max-total-tokens: 131072
+      max-prefill-tokens: 32768
+      chunked-prefill-size: 65536
+      enable-single-batch-overlap: true
+
+      # Request handling
+      max-running-requests: 30000
+      load-balance-method: "round_robin"
+
+      # Performance optimizations
+      disable-cuda-graph: true
+      enable-dp-attention: true
+      fp4-gemm-backend: "flashinfer_cutlass"
+
+      # Parallelism
+      tp-size: 4
+      dp-size: 4
+      ep-size: 4
+
+    decode:
+      # Model configuration
+      served-model-name: "deepseek-ai/DeepSeek-R1"
+      trust-remote-code: true
+      disaggregation-transfer-backend: nixl
+
+      # KV cache and attention
+      kv-cache-dtype: "fp8_e4m3"
+      attention-backend: "trtllm_mla"
+
+      # Quantization
+      quantization: "modelopt_fp4"
+      moe-runner-backend: "flashinfer_cutedsl"
+
+      # Radix cache disabled
+      disable-radix-cache: true
+      disable-chunked-prefix-cache: true
+
+      # Other flags
+      stream-interval: 50
+      decode-log-interval: 1000
+      watchdog-timeout: 1000000
+      context-length: 10000
+      disable-shared-experts-fusion: true
+      eplb-algorithm: "deepseek"
+      disaggregation-bootstrap-port: 30001
+
+      # Decode-specific mode
+      disaggregation-mode: "decode"
+
+      # Memory and token limits
+      mem-fraction-static: 0.83
+      max-total-tokens: 3122380
+      chunked-prefill-size: 786432
+
+      # Request handling
+      max-running-requests: 67584
+      enable-single-batch-overlap: true
+
+      # DeepEP configuration
+      moe-a2a-backend: "deepep"
+      deepep-mode: "low_latency"
+      ep-dispatch-algorithm: "static"
+      ep-num-redundant-experts: 32
+
+      # CUDA graphs (extensive batch size list)
+      cuda-graph-bs:
+        [
+          1,
+          2,
+          4,
+          8,
+          16,
+          24,
+          32,
+          40,
+          48,
+          56,
+          64,
+          72,
+          80,
+          88,
+          96,
+          104,
+          112,
+          120,
+          128,
+          136,
+          144,
+          152,
+          160,
+          168,
+          176,
+          184,
+          192,
+          200,
+          208,
+          216,
+          224,
+          232,
+          240,
+          248,
+          256,
+          264,
+          272,
+          280,
+          288,
+          296,
+          304,
+          312,
+          320,
+          328,
+          336,
+          344,
+          352,
+          360,
+          368,
+          376,
+          384,
+          416,
+          448,
+          480,
+          512,
+          544,
+          576,
+          608,
+          640,
+          672,
+          704,
+          736,
+          768,
+          1024,
+        ]
+      num-reserved-decode-tokens: 112
+
+      # Additional decode optimizations
+      moe-dense-tp-size: 1
+      enable-dp-lm-head: true
+      prefill-round-robin-balance: true
+      enable-dp-attention: true
+      fp4-gemm-backend: "flashinfer_cutlass"
+
+      # Parallelism
+      tp-size: 48
+      dp-size: 48
+      ep-size: 48
+
+benchmark:
+  type: "sa-bench"
+  isl: 1024
+  osl: 8192
+  concurrencies: "256x512x1024x2048"
+  req_rate: "inf"
diff --git a/recipes/gb300-fp4/1k8k/mid-curve.yaml b/recipes/gb300-fp4/1k8k/mid-curve.yaml
new file mode 100644
index 00000000..f4883606
--- /dev/null
+++ b/recipes/gb300-fp4/1k8k/mid-curve.yaml
@@ -0,0 +1,247 @@
+name: "gb300-fp4-1k8k-mid-curve"
+
+dynamo:
+  version: 0.8.1
+
+frontend:
+  type: dynamo
+  enable_multiple_frontends: true
+  num_additional_frontends: 9
+  nginx_container: nginx
+
+model:
+  path: "dsfp4"
+  container: "lmsysorg/sglang:v0.5.8-cu130-runtime"
+  precision: "fp4"
+
+resources:
+  gpu_type: "gb300"
+  prefill_nodes: 4
+  decode_nodes: 8
+  prefill_workers: 4
+  decode_workers: 1
+  gpus_per_node: 4
+
+backend:
+  # Prefill-specific environment variables
+  prefill_environment:
+    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: "1800"
+    PYTHONUNBUFFERED: "1"
+    DYN_SKIP_SGLANG_LOG_FORMATTING: "1"
+    SGLANG_NVFP4_CKPT_FP8_GEMM_IN_ATTN: "1"
+    SGLANG_PER_TOKEN_GROUP_QUANT_8BIT_V2: "1"
+    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: "100000"
+    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: "100000"
+    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: "100000"
+    MC_TE_METRIC: "true"
+    MC_FORCE_MNNVL: "1"
+    NCCL_MNNVL_ENABLE: "1"
+    NCCL_CUMEM_ENABLE: "1"
+    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: "True"
+    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: "0"
+    SGLANG_DISABLE_TP_MEMORY_INBALANCE_CHECK: "1"
+
+  # Decode-specific environment variables
+  decode_environment:
+    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: "1800"
+    PYTHONUNBUFFERED: "1"
+    DYN_SKIP_SGLANG_LOG_FORMATTING: "1"
+    SGLANG_NVFP4_CKPT_FP8_GEMM_IN_ATTN: "1"
+    SGLANG_PER_TOKEN_GROUP_QUANT_8BIT_V2: "1"
+    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: "100000"
+    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: "100000"
+    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: "100000"
+    MC_TE_METRIC: "true"
+    MC_FORCE_MNNVL: "1"
+    NCCL_MNNVL_ENABLE: "1"
+    NCCL_CUMEM_ENABLE: "1"
+    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: "True"
+    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: "0"
+    SGLANG_DISABLE_TP_MEMORY_INBALANCE_CHECK: "1"
+    SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK: "1024"
+    SGLANG_MOE_NVFP4_DISPATCH: "1"
+
+  sglang_config:
+    prefill:
+      # Model configuration
+      served-model-name: "deepseek-ai/DeepSeek-R1"
+      trust-remote-code: true
+
+      # KV cache and attention
+      kv-cache-dtype: "fp8_e4m3"
+      attention-backend: "trtllm_mla"
+      disaggregation-transfer-backend: nixl
+
+      # Quantization
+      quantization: "modelopt_fp4"
+      moe-runner-backend: "flashinfer_cutlass"
+
+      # Radix cache disabled
+      disable-radix-cache: true
+      disable-chunked-prefix-cache: true
+
+      # Other flags
+      stream-interval: 50
+      decode-log-interval: 1000
+      watchdog-timeout: 1000000
+      context-length: 10000
+      disable-shared-experts-fusion: true
+      eplb-algorithm: "deepseek"
+      disaggregation-bootstrap-port: 30001
+
+      # Prefill-specific mode
+      disaggregation-mode: "prefill"
+
+      # Memory and token limits
+      mem-fraction-static: 0.84
+      max-total-tokens: 131072
+      max-prefill-tokens: 32768
+      chunked-prefill-size: 65536
+      enable-single-batch-overlap: true
+
+      # Request handling
+      max-running-requests: 30000
+      load-balance-method: "round_robin"
+
+      # Performance optimizations
+      disable-cuda-graph: true
+      enable-dp-attention: true
+      fp4-gemm-backend: "flashinfer_cutlass"
+
+      # Parallelism
+      tp-size: 4
+      dp-size: 4
+      ep-size: 4
+
+    decode:
+      # Model configuration
+      served-model-name: "deepseek-ai/DeepSeek-R1"
+      trust-remote-code: true
+
+      # KV cache and attention
+      kv-cache-dtype: "fp8_e4m3"
+      attention-backend: "trtllm_mla"
+      disaggregation-transfer-backend: nixl
+
+      # Quantization
+      quantization: "modelopt_fp4"
+      moe-runner-backend: "flashinfer_cutedsl"
+
+      # Radix cache disabled
+      disable-radix-cache: true
+      disable-chunked-prefix-cache: true
+
+      # Other flags
+      stream-interval: 50
+      decode-log-interval: 1000
+      watchdog-timeout: 1000000
+      context-length: 10000
+      disable-shared-experts-fusion: true
+      eplb-algorithm: "deepseek"
+      disaggregation-bootstrap-port: 30001
+
+      # Decode-specific mode
+      disaggregation-mode: "decode"
+
+      # Memory and token limits
+      mem-fraction-static: 0.83
+      max-total-tokens: 3122380
+      chunked-prefill-size: 786432
+
+      # Request handling
+      max-running-requests: 67584
+
+      # DeepEP configuration
+      moe-a2a-backend: "deepep"
+      deepep-mode: "low_latency"
+      ep-dispatch-algorithm: "static"
+      ep-num-redundant-experts: 32
+
+      # CUDA graphs (extensive batch size list)
+      cuda-graph-bs:
+        [
+          1,
+          2,
+          4,
+          8,
+          16,
+          24,
+          32,
+          40,
+          48,
+          56,
+          64,
+          72,
+          80,
+          88,
+          96,
+          104,
+          112,
+          120,
+          128,
+          136,
+          144,
+          152,
+          160,
+          168,
+          176,
+          184,
+          192,
+          200,
+          208,
+          216,
+          224,
+          232,
+          240,
+          248,
+          256,
+          264,
+          272,
+          280,
+          288,
+          296,
+          304,
+          312,
+          320,
+          328,
+          336,
+          344,
+          352,
+          360,
+          368,
+          376,
+          384,
+          416,
+          448,
+          480,
+          512,
+          544,
+          576,
+          608,
+          640,
+          672,
+          704,
+          736,
+          768,
+          1024,
+        ]
+      num-reserved-decode-tokens: 112
+
+      # Additional decode optimizations
+      moe-dense-tp-size: 1
+      enable-dp-lm-head: true
+      prefill-round-robin-balance: true
+      enable-dp-attention: true
+      fp4-gemm-backend: "flashinfer_cutlass"  # Only for 0.5.8
+
+      # Parallelism
+      tp-size: 32
+      dp-size: 32
+      ep-size: 32
+
+benchmark:
+  type: "sa-bench"
+  isl: 1024
+  osl: 8192
+  concurrencies: "2048x4096x8192"
+  req_rate: "inf"
diff --git a/recipes/gb300-fp4/8k1k/low_latency.yaml b/recipes/gb300-fp4/8k1k/low_latency.yaml
new file mode 100644
index 00000000..f0c07e9d
--- /dev/null
+++ b/recipes/gb300-fp4/8k1k/low_latency.yaml
@@ -0,0 +1,121 @@
+name: "gb300-8k1k-fp4-low-latency-8k1k"
+
+dynamo:
+  version: 0.8.1
+
+frontend:  
+  type: dynamo  
+  enable_multiple_frontends: true
+  num_additional_frontends: 3
+  nginx_container: nginx
+
+model:
+  path: "dsfp4"
+  container: "lmsysorg/sglang:v0.5.8-cu130-runtime"
+  precision: "fp4"
+
+resources:
+  gpu_type: "gb300"
+  prefill_nodes: 1
+  decode_nodes: 4
+  prefill_workers: 1
+  decode_workers: 4
+  gpus_per_node: 4
+
+backend:
+
+  prefill_environment:
+    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: "1800"
+    PYTHONUNBUFFERED: "1"
+    DYN_SKIP_SGLANG_LOG_FORMATTING: "1"
+    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: "0"
+    SGLANG_DISABLE_TP_MEMORY_INBALANCE_CHECK: "1"
+    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: "100000"
+    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: "100000"
+    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: "100000"
+    SGLANG_DECODE_BOOTSTRAP_TIMEOUT: "1000"
+    MC_FORCE_MNNVL: "1"
+    NCCL_MNNVL_ENABLE: "1"
+    NCCL_CUMEM_ENABLE: "1"
+    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: "True"
+    SGLANG_ENABLE_JIT_DEEPGEMM: "false"
+
+  decode_environment:
+    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: "1800"
+    PYTHONUNBUFFERED: "1"
+    DYN_SKIP_SGLANG_LOG_FORMATTING: "1"
+    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: "0"
+    SGLANG_DISABLE_TP_MEMORY_INBALANCE_CHECK: "1"
+    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: "100000"
+    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: "100000"
+    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: "100000"
+    SGLANG_DECODE_BOOTSTRAP_TIMEOUT: "1000"
+    MC_FORCE_MNNVL: "1"
+    NCCL_MNNVL_ENABLE: "1"
+    NCCL_CUMEM_ENABLE: "1"
+    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: "True"
+    SGLANG_ENABLE_JIT_DEEPGEMM: "false"
+
+  sglang_config:
+    prefill:
+      disaggregation-mode: "prefill"
+      served-model-name: "deepseek-ai/DeepSeek-R1"
+      trust-remote-code: true
+      disable-radix-cache: true
+      kv-cache-dtype: "fp8_e4m3"
+      attention-backend: "trtllm_mla"
+      quantization: "modelopt_fp4"
+      moe-runner-backend: "flashinfer_trtllm"
+      stream-interval: 50
+      watchdog-timeout: 1000000
+      context-length: 9600 
+      mem-fraction-static: 0.95
+      max-total-tokens: 32768 
+      chunked-prefill-size: 24576 
+      cuda-graph-max-bs: 256
+      max-running-requests: 512
+      scheduler-recv-interval: 10
+      enable-symm-mem: true
+      moe-dense-tp-size: 1
+      load-balance-method: "round_robin"
+      disaggregation-bootstrap-port: 30001
+      data-parallel-size: 1
+      tensor-parallel-size: 4
+      expert-parallel-size: 1
+      enable-dp-attention: false
+      fp4-gemm-backend: "flashinfer_trtllm"
+      disaggregation-transfer-backend: nixl
+
+
+    decode:
+      disaggregation-mode: "decode"
+      served-model-name: "deepseek-ai/DeepSeek-R1"
+      prefill-round-robin-balance: true
+      trust-remote-code: true
+      disable-radix-cache: true
+      kv-cache-dtype: "fp8_e4m3"
+      attention-backend: "trtllm_mla"
+      quantization: "modelopt_fp4"
+      moe-runner-backend: "flashinfer_trtllm"
+      disaggregation-bootstrap-port: 30001
+      stream-interval: 50
+      watchdog-timeout: 1000000
+      context-length: 9600
+      mem-fraction-static: 0.95
+      chunked-prefill-size: 8192
+      cuda-graph-max-bs: 128
+      scheduler-recv-interval: 10
+      enable-symm-mem: true
+      moe-dense-tp-size: 1
+      tensor-parallel-size: 4
+      expert-parallel-size: 1
+      enable-dp-attention: false
+      fp4-gemm-backend: "flashinfer_trtllm"
+      disaggregation-transfer-backend: nixl
+
+benchmark:
+  type: "sa-bench"
+  isl: 8192 
+  osl: 1024
+  concurrencies: "4x8x32x64"
+  req_rate: 300 
diff --git a/recipes/gb300-fp4/8k1k/max_tpt.yaml b/recipes/gb300-fp4/8k1k/max_tpt.yaml
new file mode 100644
index 00000000..7d57ab7b
--- /dev/null
+++ b/recipes/gb300-fp4/8k1k/max_tpt.yaml
@@ -0,0 +1,179 @@
+name: "gb300-fp4-8k1k-max-tpt"
+
+dynamo:
+  version: 0.8.1
+      
+frontend:  
+  type: dynamo  
+  enable_multiple_frontends: true
+  num_additional_frontends: 9
+  nginx_container: nginx
+
+model:
+  path: "dsfp4"
+  container: "lmsysorg/sglang:v0.5.8-cu130-runtime"
+  precision: "fp4"
+
+resources:
+  gpu_type: "gb300"
+  prefill_nodes: 10
+  decode_nodes: 8
+  prefill_workers: 10
+  decode_workers: 1
+  gpus_per_node: 4
+
+backend:
+
+  # Prefill-specific environment variables
+  prefill_environment:
+    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: "1800"
+    PYTHONUNBUFFERED: "1"
+    DYN_SKIP_SGLANG_LOG_FORMATTING: "1"
+    SGLANG_NVFP4_CKPT_FP8_GEMM_IN_ATTN: "1"
+    SGLANG_PER_TOKEN_GROUP_QUANT_8BIT_V2: "1"
+    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: "100000"
+    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: "100000"
+    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: "100000"
+    MC_TE_METRIC: "true"
+    MC_FORCE_MNNVL: "1"
+    NCCL_MNNVL_ENABLE: "1"
+    NCCL_CUMEM_ENABLE: "1"
+    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: "True"
+    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: "0"
+    SGLANG_DISABLE_TP_MEMORY_INBALANCE_CHECK: "1"
+
+  # Decode-specific environment variables
+  decode_environment:
+    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: "1800"
+    PYTHONUNBUFFERED: "1"
+    DYN_SKIP_SGLANG_LOG_FORMATTING: "1"
+    SGLANG_NVFP4_CKPT_FP8_GEMM_IN_ATTN: "1"
+    SGLANG_PER_TOKEN_GROUP_QUANT_8BIT_V2: "1"
+    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: "100000"
+    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: "100000"
+    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: "100000"
+    MC_TE_METRIC: "true"
+    MC_FORCE_MNNVL: "1"
+    NCCL_MNNVL_ENABLE: "1"
+    NCCL_CUMEM_ENABLE: "1"
+    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: "True"
+    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: "0"
+    SGLANG_DISABLE_TP_MEMORY_INBALANCE_CHECK: "1"
+    SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK: "512"
+    SGLANG_MOE_NVFP4_DISPATCH: "1"
+
+  sglang_config:
+    prefill:
+      # Model configuration
+      served-model-name: "deepseek-ai/DeepSeek-R1"
+      trust-remote-code: true
+
+      # KV cache and attention
+      kv-cache-dtype: "fp8_e4m3"
+      attention-backend: "trtllm_mla"
+
+      # Quantization
+      quantization: "modelopt_fp4"
+      moe-runner-backend: "flashinfer_trtllm"
+
+      # Radix cache disabled
+      disable-radix-cache: true
+      disable-chunked-prefix-cache: true
+
+      # Other flags
+      stream-interval: 50
+      decode-log-interval: 1000
+      watchdog-timeout: 1000000
+      context-length: 9600
+      disable-shared-experts-fusion: true
+      disaggregation-bootstrap-port: 30001
+
+      # Prefill-specific mode
+      disaggregation-mode: "prefill"
+
+      # Memory and token limits
+      mem-fraction-static: 0.95
+      max-total-tokens: 131072
+      max-prefill-tokens: 524288
+      chunked-prefill-size: 131072
+
+      # Request handling
+      max-running-requests: 30000
+      load-balance-method: "round_robin"
+
+      # Performance optimizations
+      disable-cuda-graph: true
+      enable-dp-attention: false
+      fp4-gemm-backend: "flashinfer_cutlass"
+      disaggregation-transfer-backend: nixl
+
+      # Parallelism
+      tp-size: 4
+      dp-size: 1
+      ep-size: 1
+
+    decode:
+      # Model configuration
+      served-model-name: "deepseek-ai/DeepSeek-R1"
+      trust-remote-code: true
+
+      # KV cache and attention
+      kv-cache-dtype: "fp8_e4m3"
+      attention-backend: "trtllm_mla"
+
+      # Quantization
+      quantization: "modelopt_fp4"
+      moe-runner-backend: "flashinfer_cutedsl"
+
+      # Radix cache disabled
+      disable-radix-cache: true
+      disable-chunked-prefix-cache: true
+
+      # Other flags
+      stream-interval: 50
+      decode-log-interval: 1000
+      watchdog-timeout: 1000000
+      context-length: 9600
+      disable-shared-experts-fusion: true
+      eplb-algorithm: "deepseek"
+      disaggregation-bootstrap-port: 30001
+
+      # Decode-specific mode
+      disaggregation-mode: "decode"
+
+      # Memory and token limits
+      mem-fraction-static: 0.83
+      max-total-tokens: 524288
+      chunked-prefill-size: 24576
+
+      # Request handling
+      max-running-requests: 16384
+
+      # DeepEP configuration
+      moe-a2a-backend: "deepep"
+      deepep-mode: "low_latency"
+      ep-dispatch-algorithm: "static"
+      ep-num-redundant-experts: 32
+
+      cuda-graph-max-bs: 512
+      num-reserved-decode-tokens: 112
+
+      # Additional decode optimizations
+      moe-dense-tp-size: 1
+      enable-dp-lm-head: true
+      prefill-round-robin-balance: true
+      enable-dp-attention: true
+      fp4-gemm-backend: "flashinfer_cutlass"
+      disaggregation-transfer-backend: nixl
+
+      # Parallelism
+      tp-size: 32
+      dp-size: 32
+      ep-size: 32
+
+benchmark:
+  type: "sa-bench"
+  isl: 8192
+  osl: 1024
+  concurrencies: "2048"
+  req_rate: 700
diff --git a/recipes/gb300-fp4/8k1k/mid_curve.yaml b/recipes/gb300-fp4/8k1k/mid_curve.yaml
new file mode 100644
index 00000000..8bd3922f
--- /dev/null
+++ b/recipes/gb300-fp4/8k1k/mid_curve.yaml
@@ -0,0 +1,179 @@
+name: "gb300-fp4-8k1k-mid-curve"
+
+dynamo:
+  version: 0.8.1
+
+frontend:
+  type: dynamo
+  enable_multiple_frontends: true
+  num_additional_frontends: 9
+  nginx_container: nginx
+
+model:
+  path: "dsfp4"
+  container: "lmsysorg/sglang:v0.5.8-cu130-runtime"
+  precision: "fp4"
+
+resources:
+  gpu_type: "gb300"
+  prefill_nodes: 6
+  decode_nodes: 12
+  prefill_workers: 6
+  decode_workers: 1
+  gpus_per_node: 4
+
+backend:
+
+  # Prefill-specific environment variables
+  prefill_environment:
+    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: "1800"
+    PYTHONUNBUFFERED: "1"
+    DYN_SKIP_SGLANG_LOG_FORMATTING: "1"
+    SGLANG_NVFP4_CKPT_FP8_GEMM_IN_ATTN: "1"
+    SGLANG_PER_TOKEN_GROUP_QUANT_8BIT_V2: "1"
+    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: "100000"
+    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: "100000"
+    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: "100000"
+    MC_TE_METRIC: "true"
+    MC_FORCE_MNNVL: "1"
+    NCCL_MNNVL_ENABLE: "1"
+    NCCL_CUMEM_ENABLE: "1"
+    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: "True"
+    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: "0"
+    SGLANG_DISABLE_TP_MEMORY_INBALANCE_CHECK: "1"
+
+  # Decode-specific environment variables
+  decode_environment:
+    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: "1800"
+    PYTHONUNBUFFERED: "1"
+    DYN_SKIP_SGLANG_LOG_FORMATTING: "1"
+    SGLANG_NVFP4_CKPT_FP8_GEMM_IN_ATTN: "1"
+    SGLANG_PER_TOKEN_GROUP_QUANT_8BIT_V2: "1"
+    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: "100000"
+    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: "100000"
+    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: "100000"
+    MC_TE_METRIC: "true"
+    MC_FORCE_MNNVL: "1"
+    NCCL_MNNVL_ENABLE: "1"
+    NCCL_CUMEM_ENABLE: "1"
+    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: "True"
+    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: "0"
+    SGLANG_DISABLE_TP_MEMORY_INBALANCE_CHECK: "1"
+    SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK: "512"
+    SGLANG_MOE_NVFP4_DISPATCH: "1"
+
+  sglang_config:
+    prefill:
+      # Model configuration
+      served-model-name: "deepseek-ai/DeepSeek-R1"
+      trust-remote-code: true
+
+      # KV cache and attention
+      kv-cache-dtype: "fp8_e4m3"
+      attention-backend: "trtllm_mla"
+
+      # Quantization
+      quantization: "modelopt_fp4"
+      moe-runner-backend: "flashinfer_trtllm"
+
+      # Radix cache disabled
+      disable-radix-cache: true
+      disable-chunked-prefix-cache: true
+
+      # Other flags
+      stream-interval: 50
+      decode-log-interval: 1000
+      watchdog-timeout: 1000000
+      context-length: 9600
+      disable-shared-experts-fusion: true
+      disaggregation-bootstrap-port: 30001
+
+      # Prefill-specific mode
+      disaggregation-mode: "prefill"
+
+      # Memory and token limits
+      mem-fraction-static: 0.95
+      max-total-tokens: 131072
+      max-prefill-tokens: 524288
+      chunked-prefill-size: 131072
+
+      # Request handling
+      max-running-requests: 30000
+      load-balance-method: "round_robin"
+
+      # Performance optimizations
+      disable-cuda-graph: true
+      enable-dp-attention: false
+      fp4-gemm-backend: "flashinfer_cutlass"
+      disaggregation-transfer-backend: nixl
+
+      # Parallelism
+      tp-size: 4
+      dp-size: 1
+      ep-size: 1
+
+    decode:
+      # Model configuration
+      served-model-name: "deepseek-ai/DeepSeek-R1"
+      trust-remote-code: true
+
+      # KV cache and attention
+      kv-cache-dtype: "fp8_e4m3"
+      attention-backend: "trtllm_mla"
+
+      # Quantization
+      quantization: "modelopt_fp4"
+      moe-runner-backend: "flashinfer_cutedsl"
+
+      # Radix cache disabled
+      disable-radix-cache: true
+      disable-chunked-prefix-cache: true
+
+      # Other flags
+      stream-interval: 50
+      decode-log-interval: 1000
+      watchdog-timeout: 1000000
+      context-length: 9600
+      disable-shared-experts-fusion: true
+      eplb-algorithm: "deepseek"
+      disaggregation-bootstrap-port: 30001
+
+      # Decode-specific mode
+      disaggregation-mode: "decode"
+
+      # Memory and token limits
+      mem-fraction-static: 0.83
+      max-total-tokens: 524288
+      chunked-prefill-size: 24576
+
+      # Request handling
+      max-running-requests: 16384
+
+      # DeepEP configuration
+      moe-a2a-backend: "deepep"
+      deepep-mode: "low_latency"
+      ep-dispatch-algorithm: "static"
+      ep-num-redundant-experts: 32
+
+      cuda-graph-max-bs: 512
+      num-reserved-decode-tokens: 112
+
+      # Additional decode optimizations
+      moe-dense-tp-size: 1
+      enable-dp-lm-head: true
+      prefill-round-robin-balance: true
+      enable-dp-attention: true
+      fp4-gemm-backend: "flashinfer_cutlass"
+      disaggregation-transfer-backend: nixl
+
+      # Parallelism
+      tp-size: 48
+      dp-size: 48
+      ep-size: 48
+
+benchmark:
+  type: "sa-bench"
+  isl: 8192
+  osl: 1024
+  concurrencies: "512x2048x4096"
+  req_rate: 700