diff --git a/.github/configs/nvidia-master.yaml b/.github/configs/nvidia-master.yaml
index e99282490..f12b586f4 100644
--- a/.github/configs/nvidia-master.yaml
+++ b/.github/configs/nvidia-master.yaml
@@ -1724,9 +1724,8 @@ gptoss-fp4-h200-vllm:
     - { tp: 8, conc-start: 4, conc-end: 32 }
 
 dsr1-fp4-gb200-dynamo-trt:
-  image: nvcr.io#nvidia/ai-dynamo/tensorrtllm-runtime:0.5.1-rc0.pre3
-  # Models are pre-downloaded to this path on GB200 runner to avoid repeated downloading
-  model: /mnt/lustre01/models/deepseek-r1-0528-fp4-v2
+  image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.1.post2
+  model: nvidia/DeepSeek-R1-0528-NVFP4-v2
   model-prefix: dsr1
   runner: gb200
   precision: fp4
@@ -1737,440 +1736,563 @@ dsr1-fp4-gb200-dynamo-trt:
   - isl: 1024
     osl: 1024
     search-space:
-    # MTP configurations
-    # tep - Run Tensor-Expert Parallel mode (attention_dp=false)
-    # NOTE: Prefill tp and ep are always 4 because each GB200 node has 4 GPUs and
-    # ctx_tp_size is hardcoded to 4 in launch_gb200-nv.sh. Decode tp/ep matches gen_tp_size.
-    # For 1k/1k: prefill batch-size=4, max-num-tokens=4608
+    # MTP configurations (spec_decoding="mtp")
     - spec-decoding: "mtp"
-      conc-list: [ 1, 2, 4, 8, 16, 36 ]
+      conc-list: [ 180 ]
       prefill:
         num-worker: 1
         tp: 4
         ep: 4
-        dp-attn: false
+        dp-attn: true
         additional-settings:
-        - "PREFILL_MAX_NUM_TOKENS=4608"
-        - "PREFILL_MAX_BATCH_SIZE=4"
+        # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp4/1k1k/mtp/ctx1_gen1_dep32_batch4_eplb0_mtp3.yaml
+        - "CONFIG_FILE=recipes/trtllm/gb200-fp4/1k1k/mtp/ctx1_gen1_dep32_batch4_eplb0_mtp3.yaml"
+      decode:
+        num-worker: 1
+        tp: 32
+        ep: 32
+        dp-attn: true
+    - spec-decoding: "mtp"
+      conc-list: [ 4, 8, 12, 24, 48 ]
+      prefill:
+        num-worker: 1
+        tp: 4
+        ep: 4
+        dp-attn: true
+        additional-settings:
+        # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp4/1k1k/mtp/ctx1_gen4_tep8_batch8_eplb0_mtp3.yaml
+        - "CONFIG_FILE=recipes/trtllm/gb200-fp4/1k1k/mtp/ctx1_gen4_tep8_batch8_eplb0_mtp3.yaml"
       decode:
         num-worker: 4
         tp: 8
         ep: 8
         dp-attn: false
-        additional-settings:
-        - "DECODE_MAX_NUM_TOKENS=128"
-        - "DECODE_MAX_BATCH_SIZE=32"
-        - "DECODE_GPU_MEM_FRACTION=0.9"
-        - "DECODE_MTP_SIZE=3"
-
-    # dep - Run Data-Expert Parallel mode (attention_dp=true)
     - spec-decoding: "mtp"
-      conc-list: [ 512, 1075 ]
+      conc-list: [ 4301 ]
       prefill:
-        num-worker: 1
+        num-worker: 2
         tp: 4
         ep: 4
         dp-attn: true
         additional-settings:
-        - "PREFILL_MAX_NUM_TOKENS=4608"
-        - "PREFILL_MAX_BATCH_SIZE=4"
+        # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp4/1k1k/mtp/ctx2_gen1_dep16_batch256_eplb256_mtp1.yaml
+        - "CONFIG_FILE=recipes/trtllm/gb200-fp4/1k1k/mtp/ctx2_gen1_dep16_batch256_eplb256_mtp1.yaml"
       decode:
         num-worker: 1
         tp: 16
         ep: 16
         dp-attn: true
-        additional-settings:
-        - "DECODE_MAX_NUM_TOKENS=256"
-        - "DECODE_MAX_BATCH_SIZE=64"
-        - "DECODE_GPU_MEM_FRACTION=0.7"
-        - "DECODE_MTP_SIZE=3"
-
     - spec-decoding: "mtp"
-      conc-list: [ 2150 ]
+      conc-list: [ 2253 ]
       prefill:
-        num-worker: 2
+        num-worker: 3
         tp: 4
         ep: 4
         dp-attn: true
         additional-settings:
-        - "PREFILL_MAX_NUM_TOKENS=4608"
-        - "PREFILL_MAX_BATCH_SIZE=4"
+        # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp4/1k1k/mtp/ctx3_gen1_dep32_batch64_eplb288_mtp1.yaml
+        - "CONFIG_FILE=recipes/trtllm/gb200-fp4/1k1k/mtp/ctx3_gen1_dep32_batch64_eplb288_mtp1.yaml"
       decode:
         num-worker: 1
-        tp: 16
-        ep: 16
+        tp: 32
+        ep: 32
+        dp-attn: true
+    - spec-decoding: "mtp"
+      conc-list: [ 16130 ]
+      prefill:
+        num-worker: 3
+        tp: 4
+        ep: 4
         dp-attn: true
         additional-settings:
-        - "DECODE_MAX_NUM_TOKENS=256"
-        - "DECODE_MAX_BATCH_SIZE=128"
-        - "DECODE_GPU_MEM_FRACTION=0.7"
-        - "DECODE_MTP_SIZE=1"
+        # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp4/1k1k/mtp/ctx3_gen5_dep4_batch768_eplb0_mtp1.yaml
+        - "CONFIG_FILE=recipes/trtllm/gb200-fp4/1k1k/mtp/ctx3_gen5_dep4_batch768_eplb0_mtp1.yaml"
+      decode:
+        num-worker: 5
+        tp: 4
+        ep: 4
+        dp-attn: true
 
-    - spec-decoding: "mtp"
-      conc-list: [ 512 ]
+
+    # Non-MTP configurations (default spec_decoding="none")
+    - conc-list: [ 4301 ]
+      prefill:
+        num-worker: 1
+        tp: 4
+        ep: 4
+        dp-attn: true
+        additional-settings:
+        # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp4/1k1k/stp/ctx1_gen1_dep8_batch512_eplb0_mtp0.yaml
+        - "CONFIG_FILE=recipes/trtllm/gb200-fp4/1k1k/stp/ctx1_gen1_dep8_batch512_eplb0_mtp0.yaml"
+      decode:
+        num-worker: 1
+        tp: 8
+        ep: 8
+        dp-attn: true
+    - conc-list: [ 666 ]
       prefill:
         num-worker: 1
         tp: 4
         ep: 4
         dp-attn: true
         additional-settings:
-        - "PREFILL_MAX_NUM_TOKENS=4608"
-        - "PREFILL_MAX_BATCH_SIZE=4"
+        # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp4/1k1k/stp/ctx1_gen1_dep32_batch16_eplb0_mtp0.yaml
+        - "CONFIG_FILE=recipes/trtllm/gb200-fp4/1k1k/stp/ctx1_gen1_dep32_batch16_eplb0_mtp0.yaml"
       decode:
         num-worker: 1
         tp: 32
         ep: 32
         dp-attn: true
+    - conc-list: [ 6144 ]
+      prefill:
+        num-worker: 1
+        tp: 4
+        ep: 4
+        dp-attn: true
         additional-settings:
-        - "DECODE_MAX_NUM_TOKENS=64"
-        - "DECODE_MAX_BATCH_SIZE=16"
-        - "DECODE_GPU_MEM_FRACTION=0.6"
-        - "DECODE_MTP_SIZE=3"
-
-    - spec-decoding: "mtp"
-      conc-list: [ 2252 ]
+        # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp4/1k1k/stp/ctx1_gen2_dep4_batch768_eplb0_mtp0.yaml
+        - "CONFIG_FILE=recipes/trtllm/gb200-fp4/1k1k/stp/ctx1_gen2_dep4_batch768_eplb0_mtp0.yaml"
+      decode:
+        num-worker: 2
+        tp: 4
+        ep: 4
+        dp-attn: true
+    - conc-list: [ 12, 24, 48, 96, 192 ]
       prefill:
         num-worker: 1
         tp: 4
         ep: 4
         dp-attn: true
         additional-settings:
-        - "PREFILL_MAX_NUM_TOKENS=4608"
-        - "PREFILL_MAX_BATCH_SIZE=4"
+        # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp4/1k1k/stp/ctx1_gen4_tep8_batch32_eplb0_mtp0.yaml
+        - "CONFIG_FILE=recipes/trtllm/gb200-fp4/1k1k/stp/ctx1_gen4_tep8_batch32_eplb0_mtp0.yaml"
       decode:
+        num-worker: 4
+        tp: 8
+        ep: 8
+        dp-attn: false
+    - conc-list: [ 5 ]
+      prefill:
         num-worker: 1
+        tp: 4
+        ep: 4
+        dp-attn: true
+        additional-settings:
+        # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp4/1k1k/stp/ctx1_gen4_tep8_batch1_eplb0_mtp0.yaml
+        - "CONFIG_FILE=recipes/trtllm/gb200-fp4/1k1k/stp/ctx1_gen4_tep8_batch1_eplb0_mtp0.yaml"
+      decode:
+        num-worker: 4
         tp: 8
         ep: 8
+        dp-attn: false
+    - conc-list: [ 4301 ]
+      prefill:
+        num-worker: 2
+        tp: 4
+        ep: 4
         dp-attn: true
         additional-settings:
-        - "DECODE_MAX_NUM_TOKENS=512"
-        - "DECODE_MAX_BATCH_SIZE=256"
-        - "DECODE_GPU_MEM_FRACTION=0.8"
-        - "DECODE_MTP_SIZE=1"
+        # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp4/1k1k/stp/ctx2_gen1_dep16_batch256_eplb256_mtp0.yaml
+        - "CONFIG_FILE=recipes/trtllm/gb200-fp4/1k1k/stp/ctx2_gen1_dep16_batch256_eplb256_mtp0.yaml"
+      decode:
+        num-worker: 1
+        tp: 16
+        ep: 16
+        dp-attn: true
+    - conc-list: [ 2253 ]
+      prefill:
+        num-worker: 2
+        tp: 4
+        ep: 4
+        dp-attn: true
+        additional-settings:
+        # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp4/1k1k/stp/ctx2_gen1_dep32_batch64_eplb0_mtp0.yaml
+        - "CONFIG_FILE=recipes/trtllm/gb200-fp4/1k1k/stp/ctx2_gen1_dep32_batch64_eplb0_mtp0.yaml"
+      decode:
+        num-worker: 1
+        tp: 32
+        ep: 32
+        dp-attn: true
 
-    # Non-MTP configurations (default spec_decoding="none")
-    # tep - Run Tensor-Expert Parallel mode (attention_dp=false)
-    - conc-list: [ 1, 2, 4, 8, 16, 32, 64, 141 ]
+  - isl: 1024
+    osl: 8192
+    search-space:
+    # MTP configurations (spec_decoding="mtp")
+    - spec-decoding: "mtp"
+      conc-list: [ 4, 8, 12, 24, 48 ]
       prefill:
         num-worker: 1
         tp: 4
         ep: 4
-        dp-attn: false
+        dp-attn: true
         additional-settings:
-        - "PREFILL_MAX_NUM_TOKENS=4608"
-        - "PREFILL_MAX_BATCH_SIZE=4"
+        # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp4/1k8k/mtp/ctx1_gen7_tep8_batch4_eplb0_mtp3.yaml
+        - "CONFIG_FILE=recipes/trtllm/gb200-fp4/1k8k/mtp/ctx1_gen7_tep8_batch4_eplb0_mtp3.yaml"
       decode:
-        num-worker: 4
+        num-worker: 7
         tp: 8
         ep: 8
         dp-attn: false
+    - spec-decoding: "mtp"
+      conc-list: [ 7 ]
+      prefill:
+        num-worker: 1
+        tp: 4
+        ep: 4
+        dp-attn: true
         additional-settings:
-        - "DECODE_MAX_NUM_TOKENS=128"
-        - "DECODE_MAX_BATCH_SIZE=128"
-        - "DECODE_GPU_MEM_FRACTION=0.9"
-        - "DECODE_MTP_SIZE=0"
-
-    # dep - Run Data-Expert Parallel mode (attention_dp=true)
-    - conc-list: [ 1075 ]
+        # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp4/1k8k/mtp/ctx1_gen7_tep8_batch1_eplb0_mtp3.yaml
+        - "CONFIG_FILE=recipes/trtllm/gb200-fp4/1k8k/mtp/ctx1_gen7_tep8_batch1_eplb0_mtp3.yaml"
+      decode:
+        num-worker: 7
+        tp: 8
+        ep: 8
+        dp-attn: false
+    - spec-decoding: "mtp"
+      conc-list: [ 128 ]
       prefill:
         num-worker: 1
         tp: 4
         ep: 4
         dp-attn: true
         additional-settings:
-        - "PREFILL_MAX_NUM_TOKENS=4608"
-        - "PREFILL_MAX_BATCH_SIZE=4"
+        # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp4/1k8k/mtp/ctx1_gen1_dep32_batch4_eplb0_mtp3.yaml
+        - "CONFIG_FILE=recipes/trtllm/gb200-fp4/1k8k/mtp/ctx1_gen1_dep32_batch4_eplb0_mtp3.yaml"
       decode:
         num-worker: 1
         tp: 32
         ep: 32
         dp-attn: true
-        additional-settings:
-        - "DECODE_MAX_NUM_TOKENS=32"
-        - "DECODE_MAX_BATCH_SIZE=32"
-        - "DECODE_GPU_MEM_FRACTION=0.7"
-        - "DECODE_MTP_SIZE=0"
-
-    - conc-list: [ 1075 ]
+    - spec-decoding: "mtp"
+      conc-list: [ 512 ]
       prefill:
         num-worker: 1
         tp: 4
         ep: 4
         dp-attn: true
         additional-settings:
-        - "PREFILL_MAX_NUM_TOKENS=4608"
-        - "PREFILL_MAX_BATCH_SIZE=4"
+        # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp4/1k8k/mtp/ctx1_gen1_dep32_batch16_eplb0_mtp3.yaml
+        - "CONFIG_FILE=recipes/trtllm/gb200-fp4/1k8k/mtp/ctx1_gen1_dep32_batch16_eplb0_mtp3.yaml"
       decode:
         num-worker: 1
+        tp: 32
+        ep: 32
+        dp-attn: true
+    - spec-decoding: "mtp"
+      conc-list: [ 3072 ]
+      prefill:
+        num-worker: 1
+        tp: 4
+        ep: 4
+        dp-attn: true
+        additional-settings:
+        # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp4/1k8k/mtp/ctx1_gen3_dep16_batch64_eplb0_mtp3.yaml
+        - "CONFIG_FILE=recipes/trtllm/gb200-fp4/1k8k/mtp/ctx1_gen3_dep16_batch64_eplb0_mtp3.yaml"
+      decode:
+        num-worker: 3
         tp: 16
         ep: 16
         dp-attn: true
-        additional-settings:
-        - "DECODE_MAX_NUM_TOKENS=64"
-        - "DECODE_MAX_BATCH_SIZE=64"
-        - "DECODE_GPU_MEM_FRACTION=0.75"
-        - "DECODE_MTP_SIZE=0"
-
-    - conc-list: [ 2048, 4300 ]
+    - spec-decoding: "mtp"
+      conc-list: [ 6144 ]
       prefill:
-        num-worker: 2
+        num-worker: 1
         tp: 4
         ep: 4
         dp-attn: true
         additional-settings:
-        - "PREFILL_MAX_NUM_TOKENS=4608"
-        - "PREFILL_MAX_BATCH_SIZE=4"
+        # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp4/1k8k/mtp/ctx1_gen3_dep16_batch128_eplb0_mtp1.yaml
+        - "CONFIG_FILE=recipes/trtllm/gb200-fp4/1k8k/mtp/ctx1_gen3_dep16_batch128_eplb0_mtp1.yaml"
       decode:
-        num-worker: 1
+        num-worker: 3
         tp: 16
         ep: 16
         dp-attn: true
+    - spec-decoding: "mtp"
+      conc-list: [ 8192 ]
+      prefill:
+        num-worker: 1
+        tp: 4
+        ep: 4
+        dp-attn: true
         additional-settings:
-        - "DECODE_MAX_NUM_TOKENS=256"
-        - "DECODE_MAX_BATCH_SIZE=256"
-        - "DECODE_GPU_MEM_FRACTION=0.75"
-        - "DECODE_MTP_SIZE=0"
+        # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp4/1k8k/mtp/ctx1_gen1_dep32_batch256_eplb288_mtp1.yaml
+        - "CONFIG_FILE=recipes/trtllm/gb200-fp4/1k8k/mtp/ctx1_gen1_dep32_batch256_eplb288_mtp1.yaml"
+      decode:
+        num-worker: 1
+        tp: 32
+        ep: 32
+        dp-attn: true
 
-    - conc-list: [ 4300 ]
+    # Non-MTP configurations (default spec_decoding="none")
+    - conc-list: [ 5 ]
       prefill:
         num-worker: 1
         tp: 4
         ep: 4
         dp-attn: true
         additional-settings:
-        - "PREFILL_MAX_NUM_TOKENS=4608"
-        - "PREFILL_MAX_BATCH_SIZE=4"
+        # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp4/1k8k/stp/ctx1_gen7_tep8_batch1_eplb0_mtp0.yaml
+        - "CONFIG_FILE=recipes/trtllm/gb200-fp4/1k8k/stp/ctx1_gen7_tep8_batch1_eplb0_mtp0.yaml"
       decode:
-        num-worker: 1
+        num-worker: 7
         tp: 8
         ep: 8
+        dp-attn: false
+    - conc-list: [ 60 ]
+      prefill:
+        num-worker: 1
+        tp: 4
+        ep: 4
         dp-attn: true
         additional-settings:
-        - "DECODE_MAX_NUM_TOKENS=512"
-        - "DECODE_MAX_BATCH_SIZE=512"
-        - "DECODE_GPU_MEM_FRACTION=0.8"
-        - "DECODE_MTP_SIZE=0"
+        # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp4/1k8k/stp/ctx1_gen15_tep4_batch4_eplb0_mtp0.yaml
+        - "CONFIG_FILE=recipes/trtllm/gb200-fp4/1k8k/stp/ctx1_gen15_tep4_batch4_eplb0_mtp0.yaml"
+      decode:
+        num-worker: 15
+        tp: 4
+        ep: 4
+        dp-attn: false
+    - conc-list: [ 135 ]
+      prefill:
+        num-worker: 1
+        tp: 4
+        ep: 4
+        dp-attn: true
+        additional-settings:
+        # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp4/1k8k/stp/ctx1_gen15_tep4_batch8_eplb0_mtp0.yaml
+        - "CONFIG_FILE=recipes/trtllm/gb200-fp4/1k8k/stp/ctx1_gen15_tep4_batch8_eplb0_mtp0.yaml"
+      decode:
+        num-worker: 15
+        tp: 4
+        ep: 4
+        dp-attn: false
+    - conc-list: [ 563 ]
+      prefill:
+        num-worker: 1
+        tp: 4
+        ep: 4
+        dp-attn: true
+        additional-settings:
+        # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp4/1k8k/stp/ctx1_gen1_dep32_batch16_eplb0_mtp0.yaml
+        - "CONFIG_FILE=recipes/trtllm/gb200-fp4/1k8k/stp/ctx1_gen1_dep32_batch16_eplb0_mtp0.yaml"
+      decode:
+        num-worker: 1
+        tp: 32
+        ep: 32
+        dp-attn: true
+    - conc-list: [ 2048 ]
+      prefill:
+        num-worker: 1
+        tp: 4
+        ep: 4
+        dp-attn: true
+        additional-settings:
+        # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp4/1k8k/stp/ctx1_gen1_dep32_batch64_eplb0_mtp0.yaml
+        - "CONFIG_FILE=recipes/trtllm/gb200-fp4/1k8k/stp/ctx1_gen1_dep32_batch64_eplb0_mtp0.yaml"
+      decode:
+        num-worker: 1
+        tp: 32
+        ep: 32
+        dp-attn: true
+    - conc-list: [ 4096 ]
+      prefill:
+        num-worker: 1
+        tp: 4
+        ep: 4
+        dp-attn: true
+        additional-settings:
+        # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp4/1k8k/stp/ctx1_gen1_dep32_batch128_eplb288_mtp0.yaml
+        - "CONFIG_FILE=recipes/trtllm/gb200-fp4/1k8k/stp/ctx1_gen1_dep32_batch128_eplb288_mtp0.yaml"
+      decode:
+        num-worker: 1
+        tp: 32
+        ep: 32
+        dp-attn: true
+    - conc-list: [ 8192 ]
+      prefill:
+        num-worker: 1
+        tp: 4
+        ep: 4
+        dp-attn: true
+        additional-settings:
+        # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp4/1k8k/stp/ctx1_gen1_dep32_batch256_eplb288_mtp0.yaml
+        - "CONFIG_FILE=recipes/trtllm/gb200-fp4/1k8k/stp/ctx1_gen1_dep32_batch256_eplb288_mtp0.yaml"
+      decode:
+        num-worker: 1
+        tp: 32
+        ep: 32
+        dp-attn: true
 
   - isl: 8192
     osl: 1024
     search-space:
     # MTP configurations (spec_decoding="mtp")
-    # tep - Run Tensor-Expert Parallel mode (attention_dp=false)
-    # For 8k/1k: prefill batch-size=1, max-num-tokens=8448
     - spec-decoding: "mtp"
-      conc-list: [ 1, 2, 4, 8, 18 ]
+      conc-list: [ 4, 8, 12, 24, 48 ]
       prefill:
         num-worker: 1
         tp: 4
         ep: 4
-        dp-attn: false
+        dp-attn: true
         additional-settings:
-        - "PREFILL_MAX_NUM_TOKENS=8448"
-        - "PREFILL_MAX_BATCH_SIZE=1"
+        # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp4/8k1k/mtp/ctx1_gen4_tep8_batch8_eplb0_mtp3.yaml
+        - "CONFIG_FILE=recipes/trtllm/gb200-fp4/8k1k/mtp/ctx1_gen4_tep8_batch8_eplb0_mtp3.yaml"
       decode:
-        num-worker: 3
+        num-worker: 4
         tp: 8
         ep: 8
         dp-attn: false
-        additional-settings:
-        - "DECODE_MAX_NUM_TOKENS=64"
-        - "DECODE_MAX_BATCH_SIZE=16"
-        - "DECODE_GPU_MEM_FRACTION=0.9"
-        - "DECODE_MTP_SIZE=3"
-
-    # dep - Run Data-Expert Parallel mode (attention_dp=true)
     - spec-decoding: "mtp"
-      conc-list: [ 128, 269 ]
+      conc-list: [ 180 ]
       prefill:
-        num-worker: 5
+        num-worker: 3
         tp: 4
         ep: 4
         dp-attn: true
         additional-settings:
-        - "PREFILL_MAX_NUM_TOKENS=8448"
-        - "PREFILL_MAX_BATCH_SIZE=1"
+        # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp4/8k1k/mtp/ctx3_gen1_dep32_batch4_eplb0_mtp3.yaml
+        - "CONFIG_FILE=recipes/trtllm/gb200-fp4/8k1k/mtp/ctx3_gen1_dep32_batch4_eplb0_mtp3.yaml"
       decode:
         num-worker: 1
         tp: 32
         ep: 32
         dp-attn: true
+    - spec-decoding: "mtp"
+      conc-list: [ 1229 ]
+      prefill:
+        num-worker: 7
+        tp: 4
+        ep: 4
+        dp-attn: true
         additional-settings:
-        - "DECODE_MAX_NUM_TOKENS=32"
-        - "DECODE_MAX_BATCH_SIZE=8"
-        - "DECODE_GPU_MEM_FRACTION=0.7"
-        - "DECODE_MTP_SIZE=3"
-
+        # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp4/8k1k/mtp/ctx7_gen1_dep16_batch64_eplb256_mtp1.yaml
+        - "CONFIG_FILE=recipes/trtllm/gb200-fp4/8k1k/mtp/ctx7_gen1_dep16_batch64_eplb256_mtp1.yaml"
+      decode:
+        num-worker: 1
+        tp: 16
+        ep: 16
+        dp-attn: true
     - spec-decoding: "mtp"
-      conc-list: [ 538 ]
+      conc-list: [ 666 ]
       prefill:
         num-worker: 8
         tp: 4
         ep: 4
         dp-attn: true
         additional-settings:
-        - "PREFILL_MAX_NUM_TOKENS=8448"
-        - "PREFILL_MAX_BATCH_SIZE=1"
+        # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp4/8k1k/mtp/ctx8_gen1_dep32_batch16_eplb0_mtp3.yaml
+        - "CONFIG_FILE=recipes/trtllm/gb200-fp4/8k1k/mtp/ctx8_gen1_dep32_batch16_eplb0_mtp3.yaml"
       decode:
         num-worker: 1
         tp: 32
         ep: 32
         dp-attn: true
-        additional-settings:
-        - "DECODE_MAX_NUM_TOKENS=64"
-        - "DECODE_MAX_BATCH_SIZE=16"
-        - "DECODE_GPU_MEM_FRACTION=0.7"
-        - "DECODE_MTP_SIZE=3"
-
     - spec-decoding: "mtp"
-      conc-list: [ 1075 ]
+      conc-list: [ 4301 ]
       prefill:
-        num-worker: 8
+        num-worker: 11
         tp: 4
         ep: 4
         dp-attn: true
         additional-settings:
-        - "PREFILL_MAX_NUM_TOKENS=8448"
-        - "PREFILL_MAX_BATCH_SIZE=1"
+        # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp4/8k1k/mtp/ctx11_gen1_dep16_batch256_eplb256_mtp1.yaml
+        - "CONFIG_FILE=recipes/trtllm/gb200-fp4/8k1k/mtp/ctx11_gen1_dep16_batch256_eplb256_mtp1.yaml"
       decode:
         num-worker: 1
         tp: 16
         ep: 16
         dp-attn: true
-        additional-settings:
-        - "DECODE_MAX_NUM_TOKENS=256"
-        - "DECODE_MAX_BATCH_SIZE=64"
-        - "DECODE_GPU_MEM_FRACTION=0.75"
-        - "DECODE_MTP_SIZE=2"
 
-    - spec-decoding: "mtp"
-      conc-list: [ 2150 ]
+    # Non-MTP configurations (default spec_decoding="none")
+    - conc-list: [ 12, 44, 76 ]
       prefill:
-        num-worker: 6
+        num-worker: 1
         tp: 4
         ep: 4
         dp-attn: true
         additional-settings:
-        - "PREFILL_MAX_NUM_TOKENS=8448"
-        - "PREFILL_MAX_BATCH_SIZE=1"
+        # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp4/8k1k/stp/ctx1_gen4_tep8_batch16_eplb0_mtp0.yaml
+        - "CONFIG_FILE=recipes/trtllm/gb200-fp4/8k1k/stp/ctx1_gen4_tep8_batch16_eplb0_mtp0.yaml"
       decode:
-        num-worker: 1
+        num-worker: 4
         tp: 8
         ep: 8
         dp-attn: true
-        additional-settings:
-        - "DECODE_MAX_NUM_TOKENS=512"
-        - "DECODE_MAX_BATCH_SIZE=256"
-        - "DECODE_GPU_MEM_FRACTION=0.8"
-        - "DECODE_MTP_SIZE=1"
-
-    # Non-MTP configurations (default spec_decoding="none")
-    # tep - Run Tensor-Expert Parallel mode (attention_dp=false)
-    - conc-list: [ 1, 2, 4, 8, 16, 34 ]
+    - conc-list: [ 5 ]
       prefill:
         num-worker: 1
         tp: 4
         ep: 4
-        dp-attn: false
+        dp-attn: true
         additional-settings:
-        - "PREFILL_MAX_NUM_TOKENS=8448"
-        - "PREFILL_MAX_BATCH_SIZE=1"
+        # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp4/8k1k/stp/ctx1_gen4_tep8_batch1_eplb0_mtp0.yaml
+        - "CONFIG_FILE=recipes/trtllm/gb200-fp4/8k1k/stp/ctx1_gen4_tep8_batch1_eplb0_mtp0.yaml"
       decode:
-        num-worker: 3
+        num-worker: 4
         tp: 8
         ep: 8
-        dp-attn: false
-        additional-settings:
-        - "DECODE_MAX_NUM_TOKENS=32"
-        - "DECODE_MAX_BATCH_SIZE=32"
-        - "DECODE_GPU_MEM_FRACTION=0.9"
-        - "DECODE_MTP_SIZE=0"
-
-    # dep - Run Data-Expert Parallel mode (attention_dp=true)
-    - conc-list: [ 256, 538 ]
+        dp-attn: true
+    - conc-list: [ 333 ]
       prefill:
-        num-worker: 4
+        num-worker: 2
         tp: 4
         ep: 4
         dp-attn: true
         additional-settings:
-        - "PREFILL_MAX_NUM_TOKENS=8448"
-        - "PREFILL_MAX_BATCH_SIZE=1"
+        # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp4/8k1k/stp/ctx2_gen1_dep32_batch8_eplb0_mtp0.yaml
+        - "CONFIG_FILE=recipes/trtllm/gb200-fp4/8k1k/stp/ctx2_gen1_dep32_batch8_eplb0_mtp0.yaml"
       decode:
         num-worker: 1
         tp: 32
         ep: 32
         dp-attn: true
-        additional-settings:
-        - "DECODE_MAX_NUM_TOKENS=16"
-        - "DECODE_MAX_BATCH_SIZE=16"
-        - "DECODE_GPU_MEM_FRACTION=0.7"
-        - "DECODE_MTP_SIZE=0"
-
-    - conc-list: [ 1075 ]
+    - conc-list: [ 1229 ]
       prefill:
-        num-worker: 6
+        num-worker: 7
         tp: 4
         ep: 4
         dp-attn: true
         additional-settings:
-        - "PREFILL_MAX_NUM_TOKENS=8448"
-        - "PREFILL_MAX_BATCH_SIZE=1"
+        # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp4/8k1k/stp/ctx7_gen1_dep32_batch32_eplb0_mtp0.yaml
+        - "CONFIG_FILE=recipes/trtllm/gb200-fp4/8k1k/stp/ctx7_gen1_dep32_batch32_eplb0_mtp0.yaml"
       decode:
         num-worker: 1
-        tp: 16
-        ep: 16
+        tp: 32
+        ep: 32
         dp-attn: true
-        additional-settings:
-        - "DECODE_MAX_NUM_TOKENS=64"
-        - "DECODE_MAX_BATCH_SIZE=64"
-        - "DECODE_GPU_MEM_FRACTION=0.75"
-        - "DECODE_MTP_SIZE=0"
-
-    - conc-list: [ 2150 ]
+    - conc-list: [ 2253 ]
       prefill:
         num-worker: 8
         tp: 4
         ep: 4
         dp-attn: true
         additional-settings:
-        - "PREFILL_MAX_NUM_TOKENS=8448"
-        - "PREFILL_MAX_BATCH_SIZE=1"
+        # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp4/8k1k/stp/ctx8_gen1_dep16_batch128_eplb0_mtp0.yaml
+        - "CONFIG_FILE=recipes/trtllm/gb200-fp4/8k1k/stp/ctx8_gen1_dep16_batch128_eplb0_mtp0.yaml"
       decode:
         num-worker: 1
         tp: 16
         ep: 16
         dp-attn: true
-        additional-settings:
-        - "DECODE_MAX_NUM_TOKENS=128"
-        - "DECODE_MAX_BATCH_SIZE=128"
-        - "DECODE_GPU_MEM_FRACTION=0.75"
-        - "DECODE_MTP_SIZE=0"
-
-    - conc-list: [ 2150 ]
+    - conc-list: [ 4096 ]
       prefill:
-        num-worker: 5
+        num-worker: 10
         tp: 4
         ep: 4
         dp-attn: true
         additional-settings:
-        - "PREFILL_MAX_NUM_TOKENS=8448"
-        - "PREFILL_MAX_BATCH_SIZE=1"
+        # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb200-fp4/8k1k/stp/ctx10_gen1_dep16_batch256_eplb256_mtp0.yaml
+        - "CONFIG_FILE=recipes/trtllm/gb200-fp4/8k1k/stp/ctx10_gen1_dep16_batch256_eplb256_mtp0.yaml"
       decode:
         num-worker: 1
-        tp: 8
-        ep: 8
+        tp: 16
+        ep: 16
         dp-attn: true
-        additional-settings:
-        - "DECODE_MAX_NUM_TOKENS=256"
-        - "DECODE_MAX_BATCH_SIZE=256"
-        - "DECODE_GPU_MEM_FRACTION=0.8"
-        - "DECODE_MTP_SIZE=0"
 
 dsr1-fp8-gb200-dynamo-sglang:
   image: lmsysorg/sglang:v0.5.5.post2
-  # model: deepseek-ai/DeepSeek-R1-0528
-  # Models are pre-downloaded to this path on GB200 runner to avoid repeated downloading
-  model: /mnt/lustre01/models/deepseek-r1-0528
+  model: deepseek-ai/DeepSeek-R1-0528
   model-prefix: dsr1
   runner: gb200
   precision: fp8
@@ -2291,11 +2413,8 @@ dsr1-fp8-gb200-dynamo-sglang:
         - "DECODE_NODES=8"
 
 dsr1-fp4-gb200-dynamo-sglang:
-  image: lmsysorg/sglang:v0.5.5.post2
-  # TODO: what is the right name?
-  # model: deepseek-ai/DeepSeek-R1-0528-fp4-v2
-  # Models are pre-downloaded to this path on GB200 runner to avoid repeated downloading
-  model: /mnt/lustre01/models/deepseek-r1-0528-fp4-v2
+  image: "lmsysorg/sglang:v0.5.5.post2"
+  model: nvidia/DeepSeek-R1-0528-NVFP4-v2
   model-prefix: dsr1
   runner: gb200
   precision: fp4
@@ -2710,4 +2829,4 @@ gptoss-fp4-gb200-dynamo-trt:
         - "DECODE_MAX_NUM_TOKENS=20000"
         - "DECODE_MAX_BATCH_SIZE=512"
         - "DECODE_GPU_MEM_FRACTION=0.9"
-        
\ No newline at end of file
+        
diff --git a/benchmarks/dsr1_fp4_gb200_dynamo-trt.sh b/benchmarks/dsr1_fp4_gb200_dynamo-trt.sh
deleted file mode 100644
index b7e4836ba..000000000
--- a/benchmarks/dsr1_fp4_gb200_dynamo-trt.sh
+++ /dev/null
@@ -1,79 +0,0 @@
-#!/usr/bin/bash
-
-set -x
-
-source "$(dirname "$0")/benchmark_lib.sh"
-
-check_env_vars \
-    CONC_LIST \
-    ISL \
-    OSL \
-    IMAGE \
-    SPEC_DECODING \
-    PREFILL_NUM_WORKERS \
-    PREFILL_TP \
-    PREFILL_EP \
-    PREFILL_DP_ATTN \
-    DECODE_NUM_WORKERS \
-    DECODE_TP \
-    DECODE_EP \
-    DECODE_DP_ATTN \
-    PREFILL_MAX_NUM_TOKENS \
-    PREFILL_MAX_BATCH_SIZE \
-    DECODE_MAX_NUM_TOKENS \
-    DECODE_MAX_BATCH_SIZE \
-    DECODE_GPU_MEM_FRACTION \
-    MODEL_PATH \
-    SERVED_MODEL_NAME
-
-if [ "$SPEC_DECODING" == "mtp" ]; then
-    check_env_vars DECODE_MTP_SIZE
-else
-    DECODE_MTP_SIZE="0"
-fi
-
-PERFORMANCE_SWEEPS_PATH="components/backends/trtllm/performance_sweeps"
-
-echo "Cloning Dynamo repository..."
-git clone https://github.com/ai-dynamo/dynamo.git
-cd dynamo
-git checkout release/0.5.1-rc0.20251105
-git submodule update --init --recursive
-
-cd "$PERFORMANCE_SWEEPS_PATH"
-
-# Set up environment variables based on ISL/OSL
-if [ "$ISL" = "1024" ] && [ "$OSL" = "1024" ]; then
-    export CACHE_TRANSCEIVER_MAX_NUM_TOKENS=4608
-elif [ "$ISL" = "8192" ] && [ "$OSL" = "1024" ]; then
-    export CACHE_TRANSCEIVER_MAX_NUM_TOKENS=8448
-else
-    echo "Unsupported ISL/OSL combination: $ISL/$OSL"
-    exit 1
-fi
-
-kind=dynamo_disagg
-additional_slurm_args="--time=04:00:00"
-ntasks_per_node=4
-
-gen_nodes=$(((DECODE_TP + 3)/4 * DECODE_NUM_WORKERS))
-total_nodes=$((PREFILL_NUM_WORKERS + gen_nodes))
-total_tasks=$((total_nodes * ntasks_per_node))
-
-decode_eplb_num_slots=0
-
-sbatch --nodes=${total_nodes} \
-    --ntasks=${total_tasks} \
-    --ntasks-per-node=${ntasks_per_node} \
-    --segment=${total_nodes} ${additional_slurm_args} \
-    benchmark_disagg.slurm \
-    ${PREFILL_NUM_WORKERS} ${PREFILL_TP} \
-    ${PREFILL_MAX_BATCH_SIZE} ${PREFILL_MAX_NUM_TOKENS} \
-    ${PREFILL_DP_ATTN} ${DECODE_NUM_WORKERS} \
-    ${DECODE_TP} ${DECODE_MAX_BATCH_SIZE} \
-    ${DECODE_MAX_NUM_TOKENS} ${DECODE_DP_ATTN} \
-    ${DECODE_GPU_MEM_FRACTION} ${decode_eplb_num_slots} \
-    ${DECODE_MTP_SIZE} "${CONC_LIST}" \
-    ${gen_nodes} ${kind} \
-    ${MODEL_PATH} ${SERVED_MODEL_NAME} \
-    ${IMAGE} ${ISL} ${OSL}
\ No newline at end of file
diff --git a/perf-changelog.yaml b/perf-changelog.yaml
index 168904456..6fbdb671c 100644
--- a/perf-changelog.yaml
+++ b/perf-changelog.yaml
@@ -309,3 +309,12 @@
     - "Includes MTP and STP configurations for 1k1k and 8k1k sequence lengths"
     - "Concurrency levels: 4, 8, 16, 32, 64, 128, 256, 512"
   pr-link: https://github.com/InferenceMAX/InferenceMAX/pull/570
+  
+- config-keys:
+    - dsr1-fp4-gb200-dynamo-trt
+  description:
+    - "Update Dynamo TRT image from 0.5.1-rc0.pre3 to 0.8.1.post2"
+    - "Update TRT configurations"
+    - "Refactor configurations to use CONFIG_FILE-based recipes instead of inline parameter settings"
+    - "Introduce srt-slurm workflow for launching Dynamo jobs"
+  pr-link: https://github.com/InferenceMAX/InferenceMAX/pull/510
diff --git a/runners/launch_gb200-nv.sh b/runners/launch_gb200-nv.sh
index ed626e252..1944e04e0 100755
--- a/runners/launch_gb200-nv.sh
+++ b/runners/launch_gb200-nv.sh
@@ -4,116 +4,62 @@
 
 set -x
 
-# Set up environment variables for SLURM
-export SLURM_PARTITION="batch"
-export SLURM_ACCOUNT="benchmark"
-export SLURM_JOB_NAME="benchmark-dynamo.job"
-
-# For SGLang - we are working on updating the 8k1k configs 
-# For now we add conditionals to this script to use newer code for the 1k1k configs
-
-### FRAMEWORK_DIFF_IF_STATEMENT #1 - difference in setting up envvars
-SQUASH_FILE="/mnt/lustre01/users/sa-shared/images/$(echo "$IMAGE" | sed 's/[\/:@#]/_/g').sqsh"
-srun --partition=$SLURM_PARTITION --exclusive --time=180 bash -c "enroot import -o $SQUASH_FILE docker://$IMAGE"
-
-# Update the IMAGE variable to the squash file
-export IMAGE=$SQUASH_FILE
-
-# MODEL_PATH is set in `nvidia-master.yaml` or any other yaml files
-export MODEL_PATH=$MODEL
-
+# MODEL_PATH: Override with pre-downloaded paths on GB200 runner
+# The yaml files specify HuggingFace model IDs for portability, but we use
+# local paths to avoid repeated downloading on the shared GB200 cluster.
 if [[ $FRAMEWORK == "dynamo-sglang" ]]; then
     export CONFIG_DIR="/mnt/lustre01/artifacts/sglang-configs/1k1k"
-    export SGL_SLURM_JOBS_PATH="dynamo/examples/backends/sglang/slurm_jobs"
+    if [[ $MODEL_PREFIX == "dsr1" ]]; then
+        export MODEL_PATH="/mnt/lustre01/models/deepseek-r1-0528"
+    else
+        export MODEL_PATH=$MODEL
+    fi
 elif [[ $FRAMEWORK == "dynamo-trt" ]]; then
     if [[ $MODEL_PREFIX == "gptoss" ]]; then
         export MODEL_PATH="/mnt/lustre01/models/gpt-oss-120b"
         export SERVED_MODEL_NAME="gpt-oss-120b"
     elif [[ $MODEL_PREFIX == "dsr1" ]]; then
+        export MODEL_PATH="/mnt/lustre01/models/deepseek-r1-0528-fp4-v2/"
         export SERVED_MODEL_NAME="deepseek-r1-fp4"
     else
-        echo "Unsupported model prefix: $MODEL_PREFIX. Supported prefixes are: gptoss"
+        echo "Unsupported model prefix: $MODEL_PREFIX. Supported prefixes are: gptoss or dsr1"
         exit 1
     fi
+else
+    export MODEL_PATH=$MODEL
 fi
 
-export ISL="$ISL"
-export OSL="$OSL"
-
-bash benchmarks/"${EXP_NAME%%_*}_${PRECISION}_gb200_${FRAMEWORK}.sh"
-
-# Wait for all jobs to complete
-echo "Waiting for all jobs to complete..."
-while [ -n "$(squeue -u $USER --noheader --format='%i')" ]; do
-    echo "Jobs still running..."
-    squeue --steps -u $USER
-    sleep 30
-done
-
-# FIXME: The below is bad and is a result of the indirection of the ways in which
-# Dynamo jobs are launched. In a follow-up PR, the location of the result file should not
-# depend on the runner, it should always be in the same spot in the GH workspace.
-
-# Process results from all configurations
-if [[ $FRAMEWORK == "dynamo-trt" ]]; then
-
-    # Find the logs directory (should be only one for this ISL/OSL combination)
-    LOGS_DIR=$(find . -name "dynamo_disagg-bm-${ISL}-${OSL}" -type d | head -1)
-    if [ -z "$LOGS_DIR" ]; then
-        echo "No logs directory found for ISL=${ISL}, OSL=${OSL}"
-        exit 1
-    fi
-
-    echo "Found logs directory: $LOGS_DIR"
-
-    # Find all result subdirectories in this logs directory
-    RESULT_SUBDIRS=$(find "$LOGS_DIR" -name "ctx*_gen*_*_batch*_eplb*_mtp*" -type d)
-
-    if [ -z "$RESULT_SUBDIRS" ]; then
-        echo "No result subdirectories found in $LOGS_DIR"
-        exit 1
-    fi
-
-    echo "Found result subdirectories:"
-    echo "$RESULT_SUBDIRS"
-
-    # Process results from all configurations
-    for result_subdir in $RESULT_SUBDIRS; do
-        echo "Processing result subdirectory: $result_subdir"
+# Set up environment variables for SLURM
+export SLURM_PARTITION="batch"
+export SLURM_ACCOUNT="benchmark"
+export SLURM_JOB_NAME="benchmark-dynamo.job"
 
-        # Extract configuration info from directory name
-        CONFIG_NAME=$(basename "$result_subdir")
+NGINX_IMAGE="nginx:1.27.4"
 
-        # Process individual concurrency result files
-        RESULTS_SUBDIR="$result_subdir/results"
+SQUASH_FILE="/mnt/lustre01/users-public/sa-shared/$(echo "$IMAGE" | sed 's/[\/:@#]/_/g').sqsh"
+NGINX_SQUASH_FILE="/mnt/lustre01/users-public/sa-shared/$(echo "$NGINX_IMAGE" | sed 's/[\/:@#]/_/g').sqsh"
 
-        if [ -d "$RESULTS_SUBDIR" ]; then
-            echo "Processing results from: $RESULTS_SUBDIR"
+srun -N 1 -A $SLURM_ACCOUNT -p $SLURM_PARTITION bash -c "enroot import -o $SQUASH_FILE docker://$IMAGE"
+srun -N 1 -A $SLURM_ACCOUNT -p $SLURM_PARTITION bash -c "enroot import -o $NGINX_SQUASH_FILE docker://$NGINX_IMAGE"
 
-            # Find all concurrency result files with new format
-            CONCURRENCY_FILES=$(find "$RESULTS_SUBDIR" -name "results_concurrency_*_gpus_*.json")
 
-            for result_file in $CONCURRENCY_FILES; do
-                if [ -f "$result_file" ]; then
-                    # Extract concurrency and GPU count from filename
-                    filename=$(basename "$result_file")
-                    concurrency=$(echo "$filename" | sed 's/results_concurrency_\([0-9]*\)_gpus_.*\.json/\1/')
-                    gpus=$(echo "$filename" | sed 's/results_concurrency_.*_gpus_\([0-9]*\)\.json/\1/')
-                    echo "Processing concurrency $concurrency with $gpus GPUs: $result_file"
 
-                    # Copy the result file to workspace with a unique name
-                    WORKSPACE_RESULT_FILE="$GITHUB_WORKSPACE/${RESULT_FILENAME}_${CONFIG_NAME}_conc${concurrency}_gpus${gpus}.json"
-                    cp "$result_file" "$WORKSPACE_RESULT_FILE"
+export ISL="$ISL"
+export OSL="$OSL"
 
-                    echo "Copied result file to: $WORKSPACE_RESULT_FILE"
-                fi
-            done
-        else
-            echo "Results subdirectory not found: $RESULTS_SUBDIR"
-        fi
+if [[ $FRAMEWORK == "dynamo-sglang" ]]; then
+    export IMAGE=$SQUASH_FILE
+    export SGL_SLURM_JOBS_PATH="dynamo/examples/backends/sglang/slurm_jobs"
+    bash benchmarks/"${EXP_NAME%%_*}_${PRECISION}_gb200_${FRAMEWORK}.sh"
+    # Wait for all jobs to complete
+    echo "Waiting for all jobs to complete..."
+    while [ -n "$(squeue -u $USER --noheader --format='%i')" ]; do
+        echo "Jobs still running..."
+        squeue --steps -u $USER
+        sleep 30
     done
-else # search for "FRAMEWORK_DIFF_IF_STATEMENT #3" for this if-statement
-    # Find the latest log directory that contains the data
+
+        # Find the latest log directory that contains the data
     cat > collect_latest_results.py <<'PY'
 import os, sys
 sgl_job_dir, isl, osl, nexp = sys.argv[1], int(sys.argv[2]), int(sys.argv[3]), int(sys.argv[4])
@@ -141,6 +87,162 @@ PY
             cp $result_file $WORKSPACE_RESULT_FILE
         fi
     done
+
+    exit 0
+fi
+
+echo "Cloning srt-slurm repository..."
+SRT_REPO_DIR="srt-slurm"
+if [ -d "$SRT_REPO_DIR" ]; then
+    echo "Removing existing $SRT_REPO_DIR..."
+    rm -rf "$SRT_REPO_DIR"
 fi
 
-echo "All result files processed"
+git clone https://github.com/ishandhanani/srt-slurm.git "$SRT_REPO_DIR"
+cd "$SRT_REPO_DIR"
+git checkout sa-submission-q1-2026
+
+echo "Installing srtctl..."
+curl -LsSf https://astral.sh/uv/install.sh | sh
+source $HOME/.local/bin/env
+
+uv venv
+source .venv/bin/activate
+uv pip install -e .
+
+if ! command -v srtctl &> /dev/null; then
+    echo "Error: Failed to install srtctl"
+    exit 1
+fi
+
+echo "Configs available at: $SRT_REPO_DIR/"
+
+# Create srtslurm.yaml for srtctl (used by both frameworks)
+SRTCTL_ROOT="${GITHUB_WORKSPACE}/srt-slurm"
+echo "Creating srtslurm.yaml configuration..."
+cat > srtslurm.yaml <<EOF
+# SRT SLURM Configuration for GB200
+
+# Default SLURM settings
+default_account: "${SLURM_ACCOUNT}"
+default_partition: "${SLURM_PARTITION}"
+default_time_limit: "6:00:00"
+
+# Resource defaults
+gpus_per_node: 4
+network_interface: ""
+
+# Path to srtctl repo root (where the configs live)
+srtctl_root: "${SRTCTL_ROOT}"
+
+# Model path aliases
+model_paths:
+  "${MODEL_PREFIX}": "${MODEL_PATH}"
+containers:
+  dynamo-trtllm: ${SQUASH_FILE}
+  dynamo-sglang: ${SQUASH_FILE}
+  nginx-sqsh: ${NGINX_SQUASH_FILE}
+EOF
+
+echo "Generated srtslurm.yaml:"
+cat srtslurm.yaml
+
+echo "Running make setup..."
+make setup ARCH=aarch64
+
+# these 2 lines are for debugging
+# TODO: remove when merge
+echo "Make setup complete"
+ls configs/
+
+echo "Submitting job with srtctl..."
+if [[ "$FRAMEWORK" == "dynamo-sglang" ]]; then
+    SRTCTL_OUTPUT=$(srtctl apply -f "$CONFIG_FILE" --tags "gb200,${MODEL_PREFIX},${PRECISION},${ISL}x${OSL},infmax-$(date +%Y%m%d)" --setup-script install-torchao.sh 2>&1)
+else
+    SRTCTL_OUTPUT=$(srtctl apply -f "$CONFIG_FILE" --tags "gb200,${MODEL_PREFIX},${PRECISION},${ISL}x${OSL},infmax-$(date +%Y%m%d)" 2>&1)
+fi
+echo "$SRTCTL_OUTPUT"
+
+JOB_ID=$(echo "$SRTCTL_OUTPUT" | grep -oP '✅ Job \K[0-9]+' || echo "$SRTCTL_OUTPUT" | grep -oP 'Job \K[0-9]+')
+
+if [ -z "$JOB_ID" ]; then
+    echo "Error: Failed to extract JOB_ID from srtctl output"
+    exit 1
+fi
+
+echo "Extracted JOB_ID: $JOB_ID"
+
+# Wait for this specific job to complete
+echo "Waiting for job $JOB_ID to complete..."
+while [ -n "$(squeue -j $JOB_ID --noheader 2>/dev/null)" ]; do
+    echo "Job $JOB_ID still running..."
+    squeue -j $JOB_ID
+    sleep 30
+done
+echo "Job $JOB_ID completed!"
+
+echo "Collecting results..."
+
+# Use the JOB_ID to find the logs directory
+# srtctl creates logs in outputs/JOB_ID/logs/
+LOGS_DIR="outputs/$JOB_ID/logs"
+
+if [ ! -d "$LOGS_DIR" ]; then
+    echo "Warning: Logs directory not found at $LOGS_DIR"
+    exit 1
+fi
+
+echo "Found logs directory: $LOGS_DIR"
+
+cat $LOGS_DIR/sweep_$JOB_ID.log
+
+for file in $LOGS_DIR/*; do
+    if [ -f "$file" ]; then
+        tail -n 500 $file
+    fi
+done
+
+# Find all result subdirectories
+RESULT_SUBDIRS=$(find "$LOGS_DIR" -maxdepth 1 -type d -name "*isl*osl*" 2>/dev/null)
+
+if [ -z "$RESULT_SUBDIRS" ]; then
+    echo "Warning: No result subdirectories found in $LOGS_DIR"
+else
+    # Process results from all configurations
+    for result_subdir in $RESULT_SUBDIRS; do
+        echo "Processing result subdirectory: $result_subdir"
+
+        # Extract configuration info from directory name
+        CONFIG_NAME=$(basename "$result_subdir")
+
+        # Find all result JSON files
+        RESULT_FILES=$(find "$result_subdir" -name "results_concurrency_*.json" 2>/dev/null)
+
+        for result_file in $RESULT_FILES; do
+            if [ -f "$result_file" ]; then
+                # Extract metadata from filename
+                # Files are of the format "results_concurrency_gpus_{num gpus}_ctx_{num ctx}_gen_{num gen}.json"
+                filename=$(basename "$result_file")
+                concurrency=$(echo "$filename" | sed -n 's/results_concurrency_\([0-9]*\)_gpus_.*/\1/p')
+                gpus=$(echo "$filename" | sed -n 's/results_concurrency_[0-9]*_gpus_\([0-9]*\)_ctx_.*/\1/p')
+                ctx=$(echo "$filename" | sed -n 's/.*_ctx_\([0-9]*\)_gen_.*/\1/p')
+                gen=$(echo "$filename" | sed -n 's/.*_gen_\([0-9]*\)\.json/\1/p')
+
+                echo "Processing concurrency $concurrency with $gpus GPUs (ctx: $ctx, gen: $gen): $result_file"
+
+                WORKSPACE_RESULT_FILE="$GITHUB_WORKSPACE/${RESULT_FILENAME}_${CONFIG_NAME}_conc${concurrency}_gpus_${gpus}_ctx_${ctx}_gen_${gen}.json"
+                cp "$result_file" "$WORKSPACE_RESULT_FILE"
+
+                echo "Copied result file to: $WORKSPACE_RESULT_FILE"
+            fi
+        done
+    done
+fi
+
+# Cleanup
+echo "Cleaning up..."
+deactivate 2>/dev/null || true
+rm -rf .venv
+echo "Cleanup complete"
+
+echo "All result files processed"
\ No newline at end of file