Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,6 @@ policy:
max_total_sequence_length: 8192
dtensor_cfg:
enabled: false
sequence_packing:
algorithm: modified_ffd
make_sequence_length_divisible_by: ${policy.megatron_cfg.tensor_model_parallel_size}
optimizer: null
megatron_cfg:
Expand Down Expand Up @@ -45,7 +43,6 @@ policy:
precision: fp8
use_deep_gemm: true
gpu_memory_utilization: 0.5
expert_parallel_size: 4
quantization_ignored_layer_kws: [
a_proj,
b_proj
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
defaults: ./grpo-deepseek-v3-32n8g.yaml
checkpointing:
checkpoint_dir: results/grpo-deepseek-v3-32n4g
policy:
sequence_packing:
enabled: false
megatron_cfg:
pipeline_model_parallel_size: 8
expert_model_parallel_size: 16
num_layers_in_first_pipeline_stage: 7
num_layers_in_last_pipeline_stage: 6
generation:
vllm_cfg:
tensor_parallel_size: 16
logger:
log_dir: logs/grpo-deepseek-v3-32n4g
wandb:
name: grpo-deepseek-v3-32n4g
cluster:
gpus_per_node: 4
num_nodes: 32
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
defaults: ./grpo-deepseek-v3-64n8g-async-1off.yaml
checkpointing:
checkpoint_dir: results/grpo-deepseek-v3-64n4g-async-1off
policy:
sequence_packing:
enabled: false
megatron_cfg:
pipeline_model_parallel_size: 8
expert_model_parallel_size: 16
num_layers_in_first_pipeline_stage: 7
num_layers_in_last_pipeline_stage: 6
generation:
colocated:
resources:
gpus_per_node: 4
vllm_cfg:
tensor_parallel_size: 16
gpu_memory_utilization: 0.8
logger:
log_dir: logs/grpo-deepseek-v3-64n4g-async-32T32G-1off
wandb:
name: grpo-deepseek-v3-64n4g-async-32T32G-1off
Comment on lines +20 to +22
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Inconsistent naming: log paths include "32T32G" suffix not present elsewhere.

The log_dir and wandb.name use grpo-deepseek-v3-64n4g-async-32T32G-1off, but the checkpoint directory (line 3) and filename use grpo-deepseek-v3-64n4g-async-1off. The "32T32G" suffix appears only in logging paths, which may cause confusion when correlating checkpoints with logs.

🔎 Suggested fix for consistent naming
 logger:
-  log_dir: logs/grpo-deepseek-v3-64n4g-async-32T32G-1off
+  log_dir: logs/grpo-deepseek-v3-64n4g-async-1off
   wandb:
-    name: grpo-deepseek-v3-64n4g-async-32T32G-1off
+    name: grpo-deepseek-v3-64n4g-async-1off
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
log_dir: logs/grpo-deepseek-v3-64n4g-async-32T32G-1off
wandb:
name: grpo-deepseek-v3-64n4g-async-32T32G-1off
logger:
log_dir: logs/grpo-deepseek-v3-64n4g-async-1off
wandb:
name: grpo-deepseek-v3-64n4g-async-1off
🤖 Prompt for AI Agents
In
examples/configs/recipes/llm/performance/grpo-deepseek-v3-64n4g-async-1off.yaml
around lines 20–22, the log_dir and wandb.name include a "32T32G" suffix that is
inconsistent with the checkpoint directory/name on line 3; update log_dir and
wandb.name to match the checkpoint naming (remove "32T32G" so both use
grpo-deepseek-v3-64n4g-async-1off) to ensure logs and checkpoints correlate, or
alternatively add the same "32T32G" suffix to the checkpoint entries if that was
the intended canonical name—choose one naming source and make all three entries
identical.

cluster:
gpus_per_node: 4
num_nodes: 64
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
defaults: ./grpo-deepseek-v3-64n8g-async-1off.yaml
checkpointing:
checkpoint_dir: results/grpo-deepseek-v3-64n8g-fp8-async-1off
policy:
megatron_cfg:
fp8_cfg:
enabled: true
fp8: "e4m3"
fp8_recipe: "blockwise"
fp8_param: false
moe_router_dtype: fp32
env_vars:
NVTE_FP8_BLOCK_SCALING_FP32_SCALES: "1"
generation:
vllm_cfg:
tensor_parallel_size: 16
precision: "fp8"
use_deep_gemm: true
quantization_ignored_layer_kws: [
a_proj,
b_proj
]
vllm_kwargs:
max_num_seqs: 32
logger:
log_dir: logs/grpo-deepseek-v3-64n8g-fp8-async-1off
wandb:
name: grpo-deepseek-v3-64n8g-fp8-async-1off
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
defaults: ./grpo-llama3.1-8b-instruct-2n4g.yaml
grpo:
async_grpo:
enabled: true
max_trajectory_age_steps: 1
in_flight_weight_updates: true
loss_fn:
use_importance_sampling_correction: true
checkpointing:
checkpoint_dir: results/grpo-llama3.1-8b-instruct-2n4g-async-1off
policy:
megatron_cfg:
tensor_model_parallel_size: 1
pipeline_model_parallel_size: 1
sequence_parallel: false
generation:
colocated:
enabled: false
resources:
num_nodes: 1
gpus_per_node: 4
vllm_cfg:
async_engine: true
tensor_parallel_size: 1
gpu_memory_utilization: 0.8
logger:
log_dir: logs/grpo-llama3.1-8b-instruct-2n4g-async-1off
wandb:
name: grpo-llama3.1-8b-instruct-2n4g-async-1off
cluster:
gpus_per_node: 4
num_nodes: 2
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
defaults: ../../../grpo_math_1B.yaml
grpo:
num_prompts_per_step: 64
num_generations_per_prompt: 32
max_num_steps: 500
loss_fn:
use_importance_sampling_correction: true
checkpointing:
checkpoint_dir: results/grpo-llama3.1-8b-instruct-2n4g
policy:
model_name: meta-llama/Llama-3.1-8B-Instruct
tokenizer:
name: meta-llama/Llama-3.1-8B-Instruct
train_micro_batch_size: 1
logprob_batch_size: 2
max_total_sequence_length: 4096
make_sequence_length_divisible_by: 1
dtensor_cfg:
enabled: false
megatron_cfg:
enabled: true
empty_unused_memory_level: 1
converter_type: LlamaForCausalLM
tensor_model_parallel_size: 1
pipeline_model_parallel_size: 1
sequence_parallel: false
activation_checkpointing: true
defer_fp32_logits: true
optimizer:
lr: 5.0e-07
min_lr: 5.0e-08
weight_decay: 0.0
use_precision_aware_optimizer: true
scheduler:
lr_warmup_iters: 2
lr_warmup_init: 5.0e-08
fp8_cfg:
enabled: false
generation:
max_new_tokens: 4096
stop_token_ids:
- 128009
vllm_cfg:
max_model_len: 4096
tensor_parallel_size: 1
data:
max_input_seq_length: 4096
logger:
log_dir: logs/grpo-llama3.1-8b-instruct-2n4g
wandb_enabled: true
tensorboard_enabled: true
wandb:
project: nemo-rl
name: grpo-llama3.1-8b-instruct-2n4g
cluster:
gpus_per_node: 4
num_nodes: 2

Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ loss_fn:
checkpointing:
checkpoint_dir: results/grpo-llama3.1-8b-instruct-2n8g-async-1off
policy:
megatron_cfg:
pipeline_model_parallel_size: 1
generation:
colocated:
enabled: false
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
defaults: ./grpo-llama3.1-8b-instruct-2n8g-async-1off.yaml
checkpointing:
checkpoint_dir: results/grpo-llama3.1-8b-instruct-2n8g-fp8-async-1off
policy:
megatron_cfg:
fp8_cfg:
enabled: true
fp8: "e4m3"
fp8_recipe: "blockwise"
fp8_param: false
env_vars:
NVTE_FP8_BLOCK_SCALING_FP32_SCALES: "1"
generation:
vllm_cfg:
precision: "fp8"
use_deep_gemm: true
logger:
log_dir: logs/grpo-llama3.1-8b-instruct-2n8g-fp8-async-1off
wandb:
name: grpo-llama3.1-8b-instruct-2n8g-fp8-async-1off
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
defaults: ./grpo-qwen3-235b-16n8g.yaml
checkpointing:
checkpoint_dir: results/grpo-qwen3-235b-16n4g
policy:
megatron_cfg:
pipeline_model_parallel_size: 4
num_layers_in_first_pipeline_stage: 23
num_layers_in_last_pipeline_stage: 23
generation:
vllm_cfg:
tensor_parallel_size: 8
logger:
log_dir: logs/grpo-qwen3-235b-16n4g
wandb:
name: grpo-qwen3-235b-16n4g
cluster:
gpus_per_node: 4
num_nodes: 16
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
defaults: ./grpo-qwen3-235b-32n8g-async-1off.yaml
checkpointing:
checkpoint_dir: results/grpo-qwen3-235b-32n4g-async-1off
policy:
megatron_cfg:
pipeline_model_parallel_size: 4
num_layers_in_first_pipeline_stage: 23
num_layers_in_last_pipeline_stage: 23
generation:
colocated:
resources:
gpus_per_node: 4
vllm_cfg:
tensor_parallel_size: 8
logger:
log_dir: logs/grpo-qwen3-235b-32n4g-async-1off
wandb:
name: grpo-qwen3-235b-32n4g-async-1off
cluster:
gpus_per_node: 4
num_nodes: 32
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
defaults: ../../../grpo_math_1B.yaml
grpo:
num_prompts_per_step: 64
num_generations_per_prompt: 32
checkpointing:
enabled: false
checkpoint_dir: results/grpo-qwen3-30ba3b-4n4g
policy:
model_name: Qwen/Qwen3-30B-A3B
train_micro_batch_size: 1
max_total_sequence_length: 4096
dtensor_cfg:
enabled: false
optimizer: null
scheduler: null
make_sequence_length_divisible_by: ${policy.megatron_cfg.tensor_model_parallel_size}
megatron_cfg:
enabled: true
empty_unused_memory_level: 1
tensor_model_parallel_size: 1
pipeline_model_parallel_size: 1
expert_model_parallel_size: 16
sequence_parallel: false
optimizer:
lr: 3.0e-07
min_lr: 3.0e-08
scheduler:
lr_warmup_iters: 50
lr_warmup_init: 3.0e-08
env_vars:
PYTORCH_CUDA_ALLOC_CONF: expandable_segments:False
generation:
vllm_cfg:
tensor_parallel_size: 1
logger:
log_dir: logs/grpo-qwen3-30ba3b-4n4g
wandb_enabled: true
tensorboard_enabled: true
wandb:
project: nemo-rl
name: grpo-qwen3-30ba3b-4n4g
cluster:
gpus_per_node: 4
num_nodes: 4

Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
defaults: ./grpo-qwen3-30ba3b-4n4g.yaml
grpo:
async_grpo:
enabled: true
max_trajectory_age_steps: 1
in_flight_weight_updates: true
loss_fn:
use_importance_sampling_correction: true
checkpointing:
checkpoint_dir: results/grpo-qwen3-30ba3b-8n4g-async-1off
policy:
megatron_cfg:
tensor_model_parallel_size: 1
pipeline_model_parallel_size: 1
expert_model_parallel_size: 16
sequence_parallel: false
generation:
colocated:
enabled: false
resources:
num_nodes: 4
gpus_per_node: 4
vllm_cfg:
async_engine: true
tensor_parallel_size: 1
gpu_memory_utilization: 0.8
logger:
log_dir: logs/grpo-qwen3-30ba3b-8n4g-async-1off
wandb:
name: grpo-qwen3-30ba3b-8n4g-async-1off
cluster:
gpus_per_node: 4
num_nodes: 8
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
defaults: ../../../grpo_math_1B.yaml
grpo:
num_prompts_per_step: 64
num_generations_per_prompt: 32
checkpointing:
enabled: false
checkpoint_dir: results/grpo-qwen3-32b-4n4g
policy:
model_name: Qwen/Qwen3-32B
train_micro_batch_size: 1
max_total_sequence_length: 4096
dtensor_cfg:
enabled: false
optimizer: null
scheduler: null
make_sequence_length_divisible_by: ${policy.megatron_cfg.tensor_model_parallel_size}
megatron_cfg:
enabled: true
empty_unused_memory_level: 1
tensor_model_parallel_size: 2
pipeline_model_parallel_size: 1
sequence_parallel: true
optimizer:
lr: 3.0e-07
min_lr: 3.0e-08
scheduler:
lr_warmup_iters: 2
lr_warmup_init: 3.0e-08
generation:
vllm_cfg:
tensor_parallel_size: 1
logger:
log_dir: logs/grpo-qwen3-32b-4n4g
wandb_enabled: true
tensorboard_enabled: true
wandb:
project: nemo-rl
name: grpo-qwen3-32b-4n4g
cluster:
gpus_per_node: 4
num_nodes: 4

Loading
Loading