diff --git a/recipes/h200/1k1k/bs128-agg-tp-mtp.yaml b/recipes/h200/1k1k/bs128-agg-tp-mtp.yaml
new file mode 100644
index 00000000..45fa8871
--- /dev/null
+++ b/recipes/h200/1k1k/bs128-agg-tp-mtp.yaml
@@ -0,0 +1,66 @@
+name: "agg-tp-h200-fp8-mtp"
+
+model:
+  path: "dsfp8"
+  container: "lmsysorg/sglang:v0.5.8-cu130-runtime"
+  precision: "fp8"
+
+resources:
+  gpu_type: "h200"
+  agg_nodes: 1
+  agg_workers: 1
+  gpus_per_node: 8
+
+backend:
+
+  # Aggregated environment variables
+  aggregated_environment:
+    SGLANG_ENABLE_SPEC_V2: "1"
+    SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1"
+    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: "100000"
+    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: "100000"
+    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: "100000"
+
+  sglang_config:
+    aggregated:
+      # Model configuration
+      served-model-name: "deepseek-ai/DeepSeek-R1"
+      model-path: "/model/"
+      skip-tokenizer-init: true
+      trust-remote-code: true
+      watchdog-timeout: 1000000
+
+      # Parallelism
+      tp-size: 8
+      dp-size: 1
+
+      # KV cache and attention
+      attention-backend: "flashinfer"
+
+      # Radix cache disabled
+      disable-radix-cache: true
+
+      # Other flags
+      stream-interval: 10
+      max-running-requests: 128  # sum of all dp
+
+      # Memory and token limits
+      mem-fraction-static: 0.75
+      max-prefill-tokens: 32768
+      chunked-prefill-size: 32768
+
+      # CUDA graphs
+      cuda-graph-max-bs: 128
+
+      # MTP settings
+      speculative-algorithm: "EAGLE"
+      speculative-num-steps: 2
+      speculative-eagle-topk: 1
+      speculative-num-draft-tokens: 3
+
+benchmark:
+  type: "sa-bench"
+  isl: 1024
+  osl: 1024
+  concurrencies: "1x4x16x32x64x128x256x512"
+  req_rate: "inf"
diff --git a/recipes/h200/1k1k/bs256-1p6d-dep-mtp.yaml b/recipes/h200/1k1k/bs256-1p6d-dep-mtp.yaml
new file mode 100644
index 00000000..7c85acbc
--- /dev/null
+++ b/recipes/h200/1k1k/bs256-1p6d-dep-mtp.yaml
@@ -0,0 +1,115 @@
+name: "bs256-1p6d-h200-fp8-mtp"
+
+model:
+  path: "dsfp8"
+  container: "lmsysorg/sglang:v0.5.8-cu130-runtime"
+  precision: "fp8"
+
+resources:
+  gpu_type: "h200"
+  prefill_nodes: 1
+  prefill_workers: 1
+  decode_nodes: 6
+  decode_workers: 6
+  gpus_per_node: 8
+
+backend:
+
+  # Prefill-specific environment variables
+  prefill_environment:
+    SGLANG_ENABLE_SPEC_V2: "1"
+    SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1"
+    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: "100000"
+    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: "100000"
+    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: "100000"
+
+  # Decode-specific environment variables
+  decode_environment:
+    SGLANG_ENABLE_SPEC_V2: "1"
+    SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1"
+    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: "100000"
+    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: "100000"
+    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: "100000"
+
+  sglang_config:
+    prefill:
+      # Model configuration
+      served-model-name: "deepseek-ai/DeepSeek-R1"
+      model-path: "/model/"
+      skip-tokenizer-init: true
+      trust-remote-code: true
+      watchdog-timeout: 1000000
+
+      # Parallelism
+      tp-size: 8
+      dp-size: 8
+      ep-size: 8 
+      enable-dp-attention: true
+      # KV cache and attention
+      attention-backend: "flashinfer"
+
+      # Radix cache disabled
+      disable-radix-cache: true
+
+      # Other flags
+      # stream-interval: 50
+      max-running-requests: 512
+      
+
+      # Prefill-specific mode
+      disaggregation-bootstrap-port: 30001
+      disaggregation-mode: "prefill"
+      disaggregation-transfer-backend: nixl
+
+      # Memory and token limits
+      mem-fraction-static: 0.75
+      max-prefill-tokens: 65536
+      chunked-prefill-size: 262144
+
+      # Request handling
+      load-balance-method: "round_robin"
+
+
+    decode:
+      # Model configuration
+      served-model-name: "deepseek-ai/DeepSeek-R1"
+      model-path: "/model/"
+      skip-tokenizer-init: true
+      trust-remote-code: true
+      watchdog-timeout: 1000000
+
+      # Parallelism
+      tp-size: 8
+      dp-size: 8
+      ep-size: 8
+      enable-dp-attention: true
+
+      # KV cache and attention
+      attention-backend: "flashinfer"
+
+      # Other flags
+      disable-radix-cache: true
+      stream-interval: 10
+
+      # Disagg
+      disaggregation-bootstrap-port: 30001
+      disaggregation-mode: "decode"
+      disaggregation-transfer-backend: nixl
+
+      # Memory and token limits
+      mem-fraction-static: 0.75
+      max-running-requests: 128
+      cuda-graph-max-bs: 128
+
+      # MTP settings
+      speculative-algorithm: "EAGLE"
+      speculative-num-steps: 2
+      speculative-eagle-topk: 1
+      speculative-num-draft-tokens: 3
+
+benchmark:
+  type: "sa-bench"
+  isl: 1024
+  osl: 1024
+  concurrencies: "128x256x512x1024x2048"
+  req_rate: "inf"
diff --git a/recipes/h200/1k1k/bs256-1p6d-tp-mtp.yaml b/recipes/h200/1k1k/bs256-1p6d-tp-mtp.yaml
new file mode 100644
index 00000000..2d1d3626
--- /dev/null
+++ b/recipes/h200/1k1k/bs256-1p6d-tp-mtp.yaml
@@ -0,0 +1,115 @@
+name: "bs256-1p6d-h200-fp8-mtp"
+
+model:
+  path: "dsfp8"
+  container: "lmsysorg/sglang:v0.5.8-cu130-runtime"
+  precision: "fp8"
+
+resources:
+  gpu_type: "h200"
+  prefill_nodes: 1
+  prefill_workers: 1
+  decode_nodes: 6
+  decode_workers: 6
+  gpus_per_node: 8
+
+backend:
+
+  # Prefill-specific environment variables
+  prefill_environment:
+    SGLANG_ENABLE_SPEC_V2: "1"
+    SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1"
+    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: "100000"
+    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: "100000"
+    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: "100000"
+
+  # Decode-specific environment variables
+  decode_environment:
+    SGLANG_ENABLE_SPEC_V2: "1"
+    SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1"
+    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: "100000"
+    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: "100000"
+    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: "100000"
+
+  sglang_config:
+    prefill:
+      # Model configuration
+      served-model-name: "deepseek-ai/DeepSeek-R1"
+      model-path: "/model/"
+      skip-tokenizer-init: true
+      trust-remote-code: true
+      watchdog-timeout: 1000000
+
+      # Parallelism
+      tp-size: 8
+      dp-size: 1
+      ep-size: 1 
+
+      # KV cache and attention
+      attention-backend: "flashinfer"
+
+      # Radix cache disabled
+      disable-radix-cache: true
+
+      # Other flags
+      # stream-interval: 50
+      max-running-requests: 512
+      
+
+      # Prefill-specific mode
+      disaggregation-bootstrap-port: 30001
+      disaggregation-mode: "prefill"
+      disaggregation-transfer-backend: nixl
+
+      # Memory and token limits
+      mem-fraction-static: 0.7
+      max-prefill-tokens: 163840
+      chunked-prefill-size: 163840
+
+      # Request handling
+      load-balance-method: "round_robin"
+
+
+    decode:
+      # Model configuration
+      served-model-name: "deepseek-ai/DeepSeek-R1"
+      model-path: "/model/"
+      skip-tokenizer-init: true
+      trust-remote-code: true
+      watchdog-timeout: 1000000
+
+      # Parallelism
+      tp-size: 8
+      dp-size: 1
+      ep-size: 1
+
+      # KV cache and attention
+      attention-backend: "flashinfer"
+
+      # Other flags
+      disable-radix-cache: true
+      stream-interval: 10
+
+      # Disagg
+      disaggregation-bootstrap-port: 30001
+      disaggregation-mode: "decode"
+      disaggregation-transfer-backend: nixl
+
+      # Memory and token limits
+      mem-fraction-static: 0.75
+      max-running-requests: 128
+      cuda-graph-max-bs: 128
+
+      # MTP settings
+      speculative-algorithm: "EAGLE"
+      speculative-num-steps: 2
+      speculative-eagle-topk: 1
+      speculative-num-draft-tokens: 3
+
+benchmark:
+  type: "sa-bench"
+  isl: 1024
+  osl: 1024
+  # concurrencies: "128x256x512"
+  concurrencies: "512x1024x2048"
+  req_rate: "inf"
diff --git a/recipes/h200/1k1k/low-latency-1p9d-mtp.yaml b/recipes/h200/1k1k/low-latency-1p9d-mtp.yaml
new file mode 100644
index 00000000..2f5938bc
--- /dev/null
+++ b/recipes/h200/1k1k/low-latency-1p9d-mtp.yaml
@@ -0,0 +1,113 @@
+name: "low-latency-1p9d-h200-fp8-mtp"
+
+model:
+  path: "dsfp8"
+  container: "lmsysorg/sglang:v0.5.8-cu130-runtime"
+  precision: "fp8"
+
+resources:
+  gpu_type: "h200"
+  prefill_nodes: 1
+  prefill_workers: 1
+  decode_nodes: 9
+  decode_workers: 9
+  gpus_per_node: 8
+
+backend:
+
+  # Prefill-specific environment variables
+  prefill_environment:
+    SGLANG_ENABLE_SPEC_V2: "1"
+    SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1"
+    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: "100000"
+    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: "100000"
+    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: "100000"
+
+  # Decode-specific environment variables
+  decode_environment:
+    SGLANG_ENABLE_SPEC_V2: "1"
+    SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1"
+    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: "100000"
+    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: "100000"
+    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: "100000"
+
+  sglang_config:
+    prefill:
+      # Model configuration
+      served-model-name: "deepseek-ai/DeepSeek-R1"
+      model-path: "/model/"
+      skip-tokenizer-init: true
+      trust-remote-code: true
+      watchdog-timeout: 1000000
+
+      # Parallelism
+      tp-size: 8
+      dp-size: 1
+      ep-size: 1 
+
+      # KV cache and attention
+      attention-backend: "flashinfer"
+
+      # Radix cache disabled
+      disable-radix-cache: true
+
+      # Other flags
+      # stream-interval: 50
+      max-running-requests: 256
+      
+
+      # Prefill-specific mode
+      disaggregation-bootstrap-port: 30001
+      disaggregation-mode: "prefill"
+      disaggregation-transfer-backend: nixl
+
+      # Memory and token limits
+      mem-fraction-static: 0.82
+      max-prefill-tokens: 163840
+      chunked-prefill-size: 163840
+
+      # Request handling
+      load-balance-method: "round_robin"
+
+    decode:
+      # Model configuration
+      served-model-name: "deepseek-ai/DeepSeek-R1"
+      model-path: "/model/"
+      skip-tokenizer-init: true
+      trust-remote-code: true
+      watchdog-timeout: 1000000
+
+      # Parallelism
+      tp-size: 8
+      dp-size: 1
+      ep-size: 1
+
+      # KV cache and attention
+      attention-backend: "flashinfer"
+
+      # Other flags
+      disable-radix-cache: true
+      stream-interval: 10
+
+      # Disagg
+      disaggregation-bootstrap-port: 30001
+      disaggregation-mode: "decode"
+      disaggregation-transfer-backend: nixl
+
+      # Memory and token limits
+      mem-fraction-static: 0.75
+      max-running-requests: 64
+      cuda-graph-max-bs: 64
+
+      # MTP settings
+      speculative-algorithm: "EAGLE"
+      speculative-num-steps: 2
+      speculative-eagle-topk: 1
+      speculative-num-draft-tokens: 3
+
+benchmark:
+  type: "sa-bench"
+  isl: 1024
+  osl: 1024
+  concurrencies: "1x4x8x16x32x64x128x256"
+  req_rate: "inf"
diff --git a/recipes/h200/8k1k/bs128-1p1d-dep-mtp.yaml b/recipes/h200/8k1k/bs128-1p1d-dep-mtp.yaml
new file mode 100644
index 00000000..323b10e1
--- /dev/null
+++ b/recipes/h200/8k1k/bs128-1p1d-dep-mtp.yaml
@@ -0,0 +1,115 @@
+name: "bs128-1p1d-dep-h200-fp8-mtp"
+
+model:
+  path: "dsfp8"
+  container: "lmsysorg/sglang:v0.5.8-cu130-runtime"
+  precision: "fp8"
+
+resources:
+  gpu_type: "h200"
+  prefill_nodes: 1
+  prefill_workers: 1
+  decode_nodes: 1
+  decode_workers: 1
+  gpus_per_node: 8
+
+backend:
+
+  # Prefill-specific environment variables
+  prefill_environment:
+    SGLANG_ENABLE_SPEC_V2: "1"
+    SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1"
+    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: "100000"
+    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: "100000"
+    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: "100000"
+
+  # Decode-specific environment variables
+  decode_environment:
+    SGLANG_ENABLE_SPEC_V2: "1"
+    SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1"
+    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: "100000"
+    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: "100000"
+    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: "100000"
+
+  sglang_config:
+    prefill:
+      # Model configuration
+      served-model-name: "deepseek-ai/DeepSeek-R1"
+      model-path: "/model/"
+      skip-tokenizer-init: true
+      trust-remote-code: true
+      watchdog-timeout: 1000000
+
+      # Parallelism
+      tp-size: 8
+      dp-size: 1
+      ep-size: 1 
+
+      # KV cache and attention
+      attention-backend: "flashinfer"
+
+      # Radix cache disabled
+      disable-radix-cache: true
+
+      # Other flags
+      # stream-interval: 50
+      max-running-requests: 16
+      
+
+      # Prefill-specific mode
+      disaggregation-bootstrap-port: 30001
+      disaggregation-mode: "prefill"
+      disaggregation-transfer-backend: nixl
+
+      # Memory and token limits
+      mem-fraction-static: 0.75
+      max-prefill-tokens: 163840
+      chunked-prefill-size: 163840
+
+      # Request handling
+      load-balance-method: "round_robin"
+
+
+    decode:
+      # Model configuration
+      served-model-name: "deepseek-ai/DeepSeek-R1"
+      model-path: "/model/"
+      skip-tokenizer-init: true
+      trust-remote-code: true
+      watchdog-timeout: 1000000
+
+      # Parallelism
+      tp-size: 8
+      dp-size: 8
+      ep-size: 8 
+      enable-dp-attention: true
+
+      # KV cache and attention
+      attention-backend: "flashinfer"
+
+      # Other flags
+      disable-radix-cache: true
+      stream-interval: 10
+
+      # Disagg
+      disaggregation-bootstrap-port: 30001
+      disaggregation-mode: "decode"
+      disaggregation-transfer-backend: nixl
+
+      # Memory and token limits
+      mem-fraction-static: 0.85
+      max-running-requests: 192
+      cuda-graph-max-bs: 192
+
+      # MTP settings
+      speculative-algorithm: "EAGLE"
+      speculative-num-steps: 2
+      speculative-eagle-topk: 1
+      speculative-num-draft-tokens: 3
+
+benchmark:
+  type: "sa-bench"
+  isl: 8192
+  osl: 1024
+  concurrencies: "32x64x128x256x512"
+  req_rate: "inf"
diff --git a/recipes/h200/8k1k/bs128-agg-tp-mtp.yaml b/recipes/h200/8k1k/bs128-agg-tp-mtp.yaml
new file mode 100644
index 00000000..8e2e8fe9
--- /dev/null
+++ b/recipes/h200/8k1k/bs128-agg-tp-mtp.yaml
@@ -0,0 +1,66 @@
+name: "agg-tp-h200-fp8-mtp"
+
+model:
+  path: "dsfp8"
+  container: "lmsysorg/sglang:v0.5.8-cu130-runtime"
+  precision: "fp8"
+
+resources:
+  gpu_type: "h200"
+  agg_nodes: 1
+  agg_workers: 1
+  gpus_per_node: 8
+
+backend:
+
+  # Aggregated environment variables
+  aggregated_environment:
+    SGLANG_ENABLE_SPEC_V2: "1"
+    SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1"
+    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: "100000"
+    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: "100000"
+    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: "100000"
+
+  sglang_config:
+    aggregated:
+      # Model configuration
+      served-model-name: "deepseek-ai/DeepSeek-R1"
+      model-path: "/model/"
+      skip-tokenizer-init: true
+      trust-remote-code: true
+      watchdog-timeout: 1000000
+
+      # Parallelism
+      tp-size: 8
+      dp-size: 1
+
+      # KV cache and attention
+      attention-backend: "flashinfer"
+
+      # Radix cache disabled
+      disable-radix-cache: true
+
+      # Other flags
+      stream-interval: 10
+      max-running-requests: 32  # sum of all dp
+
+      # Memory and token limits
+      mem-fraction-static: 0.75
+      max-prefill-tokens: 32768
+      chunked-prefill-size: 32768
+
+      # CUDA graphs
+      cuda-graph-max-bs: 32
+
+      # MTP settings
+      speculative-algorithm: "EAGLE"
+      speculative-num-steps: 2
+      speculative-eagle-topk: 1
+      speculative-num-draft-tokens: 3
+
+benchmark:
+  type: "sa-bench"
+  isl: 8192
+  osl: 1024
+  concurrencies: "1x4x16x32x64x128x256"
+  req_rate: "inf"
diff --git a/recipes/h200/8k1k/bs16-1p3d-mtp.yaml b/recipes/h200/8k1k/bs16-1p3d-mtp.yaml
new file mode 100644
index 00000000..97bba3be
--- /dev/null
+++ b/recipes/h200/8k1k/bs16-1p3d-mtp.yaml
@@ -0,0 +1,113 @@
+name: "bs16-1p3d-h200-fp8-mtp"
+
+model:
+  path: "dsfp8"
+  container: "lmsysorg/sglang:v0.5.8-cu130-runtime"
+  precision: "fp8"
+
+resources:
+  gpu_type: "h200"
+  prefill_nodes: 1
+  prefill_workers: 1
+  decode_nodes: 3
+  decode_workers: 3
+  gpus_per_node: 8
+
+backend:
+
+  # Prefill-specific environment variables
+  prefill_environment:
+    SGLANG_ENABLE_SPEC_V2: "1"
+    SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1"
+    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: "100000"
+    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: "100000"
+    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: "100000"
+
+  # Decode-specific environment variables
+  decode_environment:
+    SGLANG_ENABLE_SPEC_V2: "1"
+    SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1"
+    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: "100000"
+    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: "100000"
+    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: "100000"
+
+  sglang_config:
+    prefill:
+      # Model configuration
+      served-model-name: "deepseek-ai/DeepSeek-R1"
+      model-path: "/model/"
+      skip-tokenizer-init: true
+      trust-remote-code: true
+      watchdog-timeout: 1000000
+
+      # Parallelism
+      tp-size: 8
+      dp-size: 1
+      ep-size: 1 
+
+      # KV cache and attention
+      attention-backend: "flashinfer"
+
+      # Radix cache disabled
+      disable-radix-cache: true
+
+      # Other flags
+      # stream-interval: 50
+      max-running-requests: 16
+      
+
+      # Prefill-specific mode
+      disaggregation-bootstrap-port: 30001
+      disaggregation-mode: "prefill"
+      disaggregation-transfer-backend: nixl
+
+      # Memory and token limits
+      mem-fraction-static: 0.82
+      max-prefill-tokens: 32768
+      chunked-prefill-size: 32768
+
+      # Request handling
+      load-balance-method: "round_robin"
+
+    decode:
+      # Model configuration
+      served-model-name: "deepseek-ai/DeepSeek-R1"
+      model-path: "/model/"
+      skip-tokenizer-init: true
+      trust-remote-code: true
+      watchdog-timeout: 1000000
+
+      # Parallelism
+      tp-size: 8
+      dp-size: 1
+      ep-size: 1
+
+      # KV cache and attention
+      attention-backend: "flashinfer"
+
+      # Other flags
+      disable-radix-cache: true
+      stream-interval: 10
+
+      # Disagg
+      disaggregation-bootstrap-port: 30001
+      disaggregation-mode: "decode"
+      disaggregation-transfer-backend: nixl
+
+      # Memory and token limits
+      mem-fraction-static: 0.82
+      max-running-requests: 32
+      cuda-graph-max-bs: 32
+
+      # MTP settings
+      speculative-algorithm: "EAGLE"
+      speculative-num-steps: 2
+      speculative-eagle-topk: 1
+      speculative-num-draft-tokens: 3
+
+benchmark:
+  type: "sa-bench"
+  isl: 8192
+  osl: 1024
+  concurrencies: "4x8x16x32x64"
+  req_rate: "inf"
diff --git a/recipes/h200/8k1k/bs4-1p7d-mtp.yaml b/recipes/h200/8k1k/bs4-1p7d-mtp.yaml
new file mode 100644
index 00000000..381bfaa9
--- /dev/null
+++ b/recipes/h200/8k1k/bs4-1p7d-mtp.yaml
@@ -0,0 +1,113 @@
+name: "bs4-1p7d-h200-fp8-mtp"
+
+model:
+  path: "dsfp8"
+  container: "lmsysorg/sglang:v0.5.8-cu130-runtime"
+  precision: "fp8"
+
+resources:
+  gpu_type: "h200"
+  prefill_nodes: 1
+  prefill_workers: 1
+  decode_nodes: 7
+  decode_workers: 7
+  gpus_per_node: 8
+
+backend:
+
+  # Prefill-specific environment variables
+  prefill_environment:
+    SGLANG_ENABLE_SPEC_V2: "1"
+    SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1"
+    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: "100000"
+    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: "100000"
+    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: "100000"
+
+  # Decode-specific environment variables
+  decode_environment:
+    SGLANG_ENABLE_SPEC_V2: "1"
+    SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1"
+    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: "100000"
+    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: "100000"
+    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: "100000"
+
+  sglang_config:
+    prefill:
+      # Model configuration
+      served-model-name: "deepseek-ai/DeepSeek-R1"
+      model-path: "/model/"
+      skip-tokenizer-init: true
+      trust-remote-code: true
+      watchdog-timeout: 1000000
+
+      # Parallelism
+      tp-size: 8
+      dp-size: 1
+      ep-size: 1 
+
+      # KV cache and attention
+      attention-backend: "flashinfer"
+
+      # Radix cache disabled
+      disable-radix-cache: true
+
+      # Other flags
+      # stream-interval: 50
+      max-running-requests: 16
+      
+
+      # Prefill-specific mode
+      disaggregation-bootstrap-port: 30001
+      disaggregation-mode: "prefill"
+      disaggregation-transfer-backend: nixl
+
+      # Memory and token limits
+      mem-fraction-static: 0.82
+      max-prefill-tokens: 32768
+      chunked-prefill-size: 32768
+
+      # Request handling
+      load-balance-method: "round_robin"
+
+    decode:
+      # Model configuration
+      served-model-name: "deepseek-ai/DeepSeek-R1"
+      model-path: "/model/"
+      skip-tokenizer-init: true
+      trust-remote-code: true
+      watchdog-timeout: 1000000
+
+      # Parallelism
+      tp-size: 8
+      dp-size: 1
+      ep-size: 1
+
+      # KV cache and attention
+      attention-backend: "flashinfer"
+
+      # Other flags
+      disable-radix-cache: true
+      stream-interval: 10
+
+      # Disagg
+      disaggregation-bootstrap-port: 30001
+      disaggregation-mode: "decode"
+      disaggregation-transfer-backend: nixl
+
+      # Memory and token limits
+      mem-fraction-static: 0.75
+      max-running-requests: 2
+      cuda-graph-max-bs: 2
+
+      # MTP settings
+      speculative-algorithm: "EAGLE"
+      speculative-num-steps: 2
+      speculative-eagle-topk: 1
+      speculative-num-draft-tokens: 3
+
+benchmark:
+  type: "sa-bench"
+  isl: 8192
+  osl: 1024
+  concurrencies: "1x4x8"
+  req_rate: "inf"
diff --git a/recipes/h200/8k1k/bs64-2p3d-mtp.yaml b/recipes/h200/8k1k/bs64-2p3d-mtp.yaml
new file mode 100644
index 00000000..75535b80
--- /dev/null
+++ b/recipes/h200/8k1k/bs64-2p3d-mtp.yaml
@@ -0,0 +1,122 @@
+name: "bs64-2p3d-h200-fp8-mtp"
+
+model:
+  path: "dsfp8"
+  container: "lmsysorg/sglang:v0.5.8-cu130-runtime"
+  precision: "fp8"
+
+resources:
+  gpu_type: "h200"
+  prefill_nodes: 2
+  prefill_workers: 2
+  decode_nodes: 3
+  decode_workers: 3
+  gpus_per_node: 8
+
+backend:
+
+  # Prefill-specific environment variables
+  prefill_environment:
+    SGLANG_ENABLE_SPEC_V2: "1"
+    SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1"
+    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: "100000"
+    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: "100000"
+    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: "100000"
+
+  # Decode-specific environment variables
+  decode_environment:
+    SGLANG_ENABLE_SPEC_V2: "1"
+    SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1"
+    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: "100000"
+    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: "100000"
+    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: "100000"
+
+  sglang_config:
+    prefill:
+      # Model configuration
+      served-model-name: "deepseek-ai/DeepSeek-R1"
+      model-path: "/model/"
+      skip-tokenizer-init: true
+      trust-remote-code: true
+      watchdog-timeout: 1000000
+
+      # Parallelism
+      tp-size: 8
+      dp-size: 1
+      ep-size: 1 
+
+      # KV cache and attention
+      attention-backend: "flashinfer"
+
+      # Radix cache disabled
+      disable-radix-cache: true
+
+      # Other flags
+      # stream-interval: 50
+      max-running-requests: 16
+      
+
+      # Prefill-specific mode
+      disaggregation-bootstrap-port: 30001
+      disaggregation-mode: "prefill"
+      disaggregation-transfer-backend: nixl
+
+      # Memory and token limits
+      mem-fraction-static: 0.82
+      max-prefill-tokens: 32768
+      chunked-prefill-size: 32768
+
+      # Request handling
+      load-balance-method: "round_robin"
+
+    decode:
+      # Model configuration
+      served-model-name: "deepseek-ai/DeepSeek-R1"
+      model-path: "/model/"
+      skip-tokenizer-init: true
+      trust-remote-code: true
+      watchdog-timeout: 1000000
+
+      # Parallelism
+      tp-size: 8
+      dp-size: 1
+      ep-size: 1
+
+      # KV cache and attention
+      attention-backend: "flashinfer"
+
+      # Other flags
+      disable-radix-cache: true
+      stream-interval: 10
+
+      # Disagg
+      disaggregation-bootstrap-port: 30001
+      disaggregation-mode: "decode"
+      disaggregation-transfer-backend: nixl
+
+      context-length: 72000
+      max-total-tokens: 128000 
+      # Memory and token limits
+      mem-fraction-static: 0.75
+      max-running-requests: 16
+      cuda-graph-max-bs: 16
+
+      # MTP settings
+      speculative-algorithm: "EAGLE"
+      speculative-num-steps: 2
+      speculative-eagle-topk: 1
+      speculative-num-draft-tokens: 3
+
+benchmark:
+  type: "sa-bench"
+  isl: 8192
+  osl: 1024
+  concurrencies: "32x64x128"
+  req_rate: "inf"
+
+# benchmark:
+#   type: "gpqa"
+#   num_examples: 198
+#   repeat: 4
+#   num_threads: 32
+#   max_tokens: 64000
diff --git a/recipes/h200/8k1k/bs8-1p6d-mtp.yaml b/recipes/h200/8k1k/bs8-1p6d-mtp.yaml
new file mode 100644
index 00000000..d3d61d70
--- /dev/null
+++ b/recipes/h200/8k1k/bs8-1p6d-mtp.yaml
@@ -0,0 +1,114 @@
+name: "bs8-1p6d-h200-fp8-mtp"
+
+model:
+  path: "dsfp8"
+  container: "lmsysorg/sglang:v0.5.8-cu130-runtime"
+  precision: "fp8"
+
+resources:
+  gpu_type: "h200"
+  prefill_nodes: 1
+  prefill_workers: 1
+  decode_nodes: 6
+  decode_workers: 6
+  gpus_per_node: 8
+
+backend:
+
+  # Prefill-specific environment variables
+  prefill_environment:
+    SGLANG_ENABLE_SPEC_V2: "1"
+    SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1"
+    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: "100000"
+    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: "100000"
+    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: "100000"
+
+  # Decode-specific environment variables
+  decode_environment:
+    SGLANG_ENABLE_SPEC_V2: "1"
+    SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1"
+    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: "100000"
+    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: "100000"
+    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: "100000"
+
+  sglang_config:
+    prefill:
+      # Model configuration
+      served-model-name: "deepseek-ai/DeepSeek-R1"
+      model-path: "/model/"
+      skip-tokenizer-init: true
+      trust-remote-code: true
+      watchdog-timeout: 1000000
+
+      # Parallelism
+      tp-size: 8
+      dp-size: 1
+      ep-size: 1 
+
+      # KV cache and attention
+      attention-backend: "flashinfer"
+
+      # Radix cache disabled
+      disable-radix-cache: true
+
+      # Other flags
+      # stream-interval: 50
+      max-running-requests: 16
+      
+
+      # Prefill-specific mode
+      disaggregation-bootstrap-port: 30001
+      disaggregation-mode: "prefill"
+      disaggregation-transfer-backend: nixl
+
+      # Memory and token limits
+      mem-fraction-static: 0.82
+      max-prefill-tokens: 32768
+      chunked-prefill-size: 32768
+
+      # Request handling
+      load-balance-method: "round_robin"
+
+
+    decode:
+      # Model configuration
+      served-model-name: "deepseek-ai/DeepSeek-R1"
+      model-path: "/model/"
+      skip-tokenizer-init: true
+      trust-remote-code: true
+      watchdog-timeout: 1000000
+
+      # Parallelism
+      tp-size: 8
+      dp-size: 1
+      ep-size: 1
+
+      # KV cache and attention
+      attention-backend: "flashinfer"
+
+      # Other flags
+      disable-radix-cache: true
+      stream-interval: 10
+
+      # Disagg
+      disaggregation-bootstrap-port: 30001
+      disaggregation-mode: "decode"
+      disaggregation-transfer-backend: nixl
+
+      # Memory and token limits
+      mem-fraction-static: 0.82
+      max-running-requests: 16
+      cuda-graph-max-bs: 16
+
+      # MTP settings
+      speculative-algorithm: "EAGLE"
+      speculative-num-steps: 2
+      speculative-eagle-topk: 1
+      speculative-num-draft-tokens: 3
+
+benchmark:
+  type: "sa-bench"
+  isl: 8192
+  osl: 1024
+  concurrencies: "2x4x8x16x32"
+  req_rate: "inf"