From 40353d5d92414e532939e2b3c07977fc1b426137 Mon Sep 17 00:00:00 2001
From: Youngeun Kwon <youngeunk@nvidia.com>
Date: Fri, 14 Nov 2025 14:15:00 -0800
Subject: [PATCH 1/3] tp2 to tp1

Signed-off-by: Youngeun Kwon <youngeunk@nvidia.com>
---
 .../llm/performance/grpo-qwen3-30ba3b-4n8g-async-1off.yaml    | 4 ++--
 .../recipes/llm/performance/grpo-qwen3-30ba3b-4n8g.yaml       | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/examples/configs/recipes/llm/performance/grpo-qwen3-30ba3b-4n8g-async-1off.yaml b/examples/configs/recipes/llm/performance/grpo-qwen3-30ba3b-4n8g-async-1off.yaml
index 02fb889e82..f6413ea975 100644
--- a/examples/configs/recipes/llm/performance/grpo-qwen3-30ba3b-4n8g-async-1off.yaml
+++ b/examples/configs/recipes/llm/performance/grpo-qwen3-30ba3b-4n8g-async-1off.yaml
@@ -10,10 +10,10 @@ checkpointing:
   checkpoint_dir: results/grpo-qwen3-30ba3b-4n8g-async-1off
 policy:
   megatron_cfg:
-    tensor_model_parallel_size: 2
+    tensor_model_parallel_size: 1
     pipeline_model_parallel_size: 1
     expert_model_parallel_size: 8
-    sequence_parallel: true
+    sequence_parallel: false
   generation:
     colocated:
       enabled: false
diff --git a/examples/configs/recipes/llm/performance/grpo-qwen3-30ba3b-4n8g.yaml b/examples/configs/recipes/llm/performance/grpo-qwen3-30ba3b-4n8g.yaml
index 4ed5c404ab..053c703eee 100644
--- a/examples/configs/recipes/llm/performance/grpo-qwen3-30ba3b-4n8g.yaml
+++ b/examples/configs/recipes/llm/performance/grpo-qwen3-30ba3b-4n8g.yaml
@@ -17,10 +17,10 @@ policy:
   megatron_cfg:
     enabled: true
     empty_unused_memory_level: 1
-    tensor_model_parallel_size: 2
+    tensor_model_parallel_size: 1
     pipeline_model_parallel_size: 1
     expert_model_parallel_size: 8
-    sequence_parallel: true
+    sequence_parallel: false
     optimizer:
       lr: 3.0e-07
       min_lr: 3.0e-08

From a8eed4170a0b57f99d00666cda357cf17d4b0e7c Mon Sep 17 00:00:00 2001
From: Youngeun Kwon <youngeunk@nvidia.com>
Date: Fri, 14 Nov 2025 14:37:24 -0800
Subject: [PATCH 2/3] modify async logprob batchsize

Signed-off-by: Youngeun Kwon <youngeunk@nvidia.com>
---
 .../llm/performance/grpo-qwen3-30ba3b-4n8g-async-1off.yaml       | 1 +
 1 file changed, 1 insertion(+)

diff --git a/examples/configs/recipes/llm/performance/grpo-qwen3-30ba3b-4n8g-async-1off.yaml b/examples/configs/recipes/llm/performance/grpo-qwen3-30ba3b-4n8g-async-1off.yaml
index f6413ea975..0cfc018263 100644
--- a/examples/configs/recipes/llm/performance/grpo-qwen3-30ba3b-4n8g-async-1off.yaml
+++ b/examples/configs/recipes/llm/performance/grpo-qwen3-30ba3b-4n8g-async-1off.yaml
@@ -9,6 +9,7 @@ loss_fn:
 checkpointing:
   checkpoint_dir: results/grpo-qwen3-30ba3b-4n8g-async-1off
 policy:
+  logprob_batch_size: 2
   megatron_cfg:
     tensor_model_parallel_size: 1
     pipeline_model_parallel_size: 1

From af8250e3168f1daf8f994e1e985347dee5535f29 Mon Sep 17 00:00:00 2001
From: Youngeun Kwon <youngeunk@nvidia.com>
Date: Fri, 14 Nov 2025 17:02:00 -0800
Subject: [PATCH 3/3] async recipe

Signed-off-by: Youngeun Kwon <youngeunk@nvidia.com>
---
 .../llm/performance/grpo-qwen3-30ba3b-4n8g-async-1off.yaml   | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/examples/configs/recipes/llm/performance/grpo-qwen3-30ba3b-4n8g-async-1off.yaml b/examples/configs/recipes/llm/performance/grpo-qwen3-30ba3b-4n8g-async-1off.yaml
index 0cfc018263..4cc5981460 100644
--- a/examples/configs/recipes/llm/performance/grpo-qwen3-30ba3b-4n8g-async-1off.yaml
+++ b/examples/configs/recipes/llm/performance/grpo-qwen3-30ba3b-4n8g-async-1off.yaml
@@ -9,10 +9,9 @@ loss_fn:
 checkpointing:
   checkpoint_dir: results/grpo-qwen3-30ba3b-4n8g-async-1off
 policy:
-  logprob_batch_size: 2
   megatron_cfg:
     tensor_model_parallel_size: 1
-    pipeline_model_parallel_size: 1
+    pipeline_model_parallel_size: 2
     expert_model_parallel_size: 8
     sequence_parallel: false
   generation:
@@ -23,7 +22,7 @@ policy:
         gpus_per_node: 8
     vllm_cfg:
       async_engine: true
-      tensor_parallel_size: 4
+      tensor_parallel_size: 2
       gpu_memory_utilization: 0.8
 logger:
   log_dir: logs/grpo-qwen3-30ba3b-4n8g-2T2G-async-1off