diff --git a/examples/configs/recipes/llm/performance/grpo-llama3.1-8b-instruct-2n4g-async-1off.yaml b/examples/configs/recipes/llm/performance/grpo-llama3.1-8b-instruct-2n4g-async-1off.yaml new file mode 100644 index 0000000000..d906eda2b4 --- /dev/null +++ b/examples/configs/recipes/llm/performance/grpo-llama3.1-8b-instruct-2n4g-async-1off.yaml @@ -0,0 +1,32 @@ +defaults: ./grpo-llama3.1-8b-instruct-2n4g.yaml +grpo: + async_grpo: + enabled: true + max_trajectory_age_steps: 1 + in_flight_weight_updates: true +loss_fn: + use_importance_sampling_correction: true +checkpointing: + checkpoint_dir: results/grpo-llama3.1-8b-instruct-2n4g-async-1off +policy: + megatron_cfg: + tensor_model_parallel_size: 1 + pipeline_model_parallel_size: 1 + sequence_parallel: false + generation: + colocated: + enabled: false + resources: + num_nodes: 1 + gpus_per_node: 4 + vllm_cfg: + async_engine: true + tensor_parallel_size: 1 + gpu_memory_utilization: 0.8 +logger: + log_dir: logs/grpo-llama3.1-8b-instruct-2n4g-async-1off + wandb: + name: grpo-llama3.1-8b-instruct-2n4g-async-1off +cluster: + gpus_per_node: 4 + num_nodes: 2 diff --git a/examples/configs/recipes/llm/performance/grpo-llama3.1-8b-instruct-2n4g.yaml b/examples/configs/recipes/llm/performance/grpo-llama3.1-8b-instruct-2n4g.yaml new file mode 100644 index 0000000000..a99f7c1498 --- /dev/null +++ b/examples/configs/recipes/llm/performance/grpo-llama3.1-8b-instruct-2n4g.yaml @@ -0,0 +1,58 @@ +defaults: ../../../grpo_math_1B.yaml +grpo: + num_prompts_per_step: 64 + num_generations_per_prompt: 32 + max_num_steps: 500 +loss_fn: + use_importance_sampling_correction: true +checkpointing: + checkpoint_dir: results/grpo-llama3.1-8b-instruct-2n4g +policy: + model_name: meta-llama/Llama-3.1-8B-Instruct + tokenizer: + name: meta-llama/Llama-3.1-8B-Instruct + train_micro_batch_size: 1 + logprob_batch_size: 2 + max_total_sequence_length: 4096 + make_sequence_length_divisible_by: 1 + dtensor_cfg: + enabled: false + megatron_cfg: + enabled: true + empty_unused_memory_level: 1 + converter_type: LlamaForCausalLM + tensor_model_parallel_size: 1 + pipeline_model_parallel_size: 1 + sequence_parallel: false + activation_checkpointing: true + defer_fp32_logits: true + optimizer: + lr: 5.0e-07 + min_lr: 5.0e-08 + weight_decay: 0.0 + use_precision_aware_optimizer: true + scheduler: + lr_warmup_iters: 2 + lr_warmup_init: 5.0e-08 + fp8_cfg: + enabled: false + generation: + max_new_tokens: 4096 + stop_token_ids: + - 128009 + vllm_cfg: + max_model_len: 4096 + tensor_parallel_size: 1 +data: + max_input_seq_length: 4096 +logger: + log_dir: logs/grpo-llama3.1-8b-instruct-2n4g + wandb_enabled: true + tensorboard_enabled: true + wandb: + project: nemo-rl + name: grpo-llama3.1-8b-instruct-2n4g +cluster: + gpus_per_node: 4 + num_nodes: 2 + diff --git a/examples/configs/recipes/llm/performance/grpo-qwen3-30ba3b-4n4g.yaml b/examples/configs/recipes/llm/performance/grpo-qwen3-30ba3b-4n4g.yaml new file mode 100644 index 0000000000..21b9746f4b --- /dev/null +++ b/examples/configs/recipes/llm/performance/grpo-qwen3-30ba3b-4n4g.yaml @@ -0,0 +1,45 @@ +defaults: ../../../grpo_math_1B.yaml +grpo: + num_prompts_per_step: 64 + num_generations_per_prompt: 32 +checkpointing: + enabled: false + checkpoint_dir: results/grpo-qwen3-30ba3b-4n4g +policy: + model_name: Qwen/Qwen3-30B-A3B + train_micro_batch_size: 1 + max_total_sequence_length: 4096 + dtensor_cfg: + enabled: false + optimizer: null + scheduler: null + make_sequence_length_divisible_by: ${policy.megatron_cfg.tensor_model_parallel_size} + megatron_cfg: + enabled: true + empty_unused_memory_level: 1 + tensor_model_parallel_size: 1 + pipeline_model_parallel_size: 1 + expert_model_parallel_size: 16 + sequence_parallel: false + optimizer: + lr: 3.0e-07 + min_lr: 3.0e-08 + scheduler: + lr_warmup_iters: 50 + lr_warmup_init: 3.0e-08 + env_vars: + PYTORCH_CUDA_ALLOC_CONF: expandable_segments:False + generation: + vllm_cfg: + tensor_parallel_size: 1 +logger: + log_dir: logs/grpo-qwen3-30ba3b-4n4g + wandb_enabled: true + tensorboard_enabled: true + wandb: + project: nemo-rl + name: grpo-qwen3-30ba3b-4n4g +cluster: + gpus_per_node: 4 + num_nodes: 4 + diff --git a/examples/configs/recipes/llm/performance/grpo-qwen3-30ba3b-8n4g-async-1off.yaml b/examples/configs/recipes/llm/performance/grpo-qwen3-30ba3b-8n4g-async-1off.yaml new file mode 100644 index 0000000000..a9837c87f2 --- /dev/null +++ b/examples/configs/recipes/llm/performance/grpo-qwen3-30ba3b-8n4g-async-1off.yaml @@ -0,0 +1,33 @@ +defaults: ./grpo-qwen3-30ba3b-4n4g.yaml +grpo: + async_grpo: + enabled: true + max_trajectory_age_steps: 1 + in_flight_weight_updates: true +loss_fn: + use_importance_sampling_correction: true +checkpointing: + checkpoint_dir: results/grpo-qwen3-30ba3b-8n4g-async-1off +policy: + megatron_cfg: + tensor_model_parallel_size: 1 + pipeline_model_parallel_size: 1 + expert_model_parallel_size: 16 + sequence_parallel: false + generation: + colocated: + enabled: false + resources: + num_nodes: 4 + gpus_per_node: 4 + vllm_cfg: + async_engine: true + tensor_parallel_size: 1 + gpu_memory_utilization: 0.8 +logger: + log_dir: logs/grpo-qwen3-30ba3b-8n4g-async-1off + wandb: + name: grpo-qwen3-30ba3b-8n4g-async-1off +cluster: + gpus_per_node: 4 + num_nodes: 8 diff --git a/examples/configs/recipes/llm/performance/grpo-qwen3-32b-4n4g.yaml b/examples/configs/recipes/llm/performance/grpo-qwen3-32b-4n4g.yaml new file mode 100644 index 0000000000..2e441cdb5f --- /dev/null +++ b/examples/configs/recipes/llm/performance/grpo-qwen3-32b-4n4g.yaml @@ -0,0 +1,42 @@ +defaults: ../../../grpo_math_1B.yaml +grpo: + num_prompts_per_step: 64 + num_generations_per_prompt: 32 +checkpointing: + enabled: false + checkpoint_dir: results/grpo-qwen3-32b-4n4g +policy: + model_name: Qwen/Qwen3-32B + train_micro_batch_size: 1 + max_total_sequence_length: 4096 + dtensor_cfg: + enabled: false + optimizer: null + scheduler: null + make_sequence_length_divisible_by: ${policy.megatron_cfg.tensor_model_parallel_size} + megatron_cfg: + enabled: true + empty_unused_memory_level: 1 + tensor_model_parallel_size: 2 + pipeline_model_parallel_size: 1 + sequence_parallel: true + optimizer: + lr: 3.0e-07 + min_lr: 3.0e-08 + scheduler: + lr_warmup_iters: 2 + lr_warmup_init: 3.0e-08 + generation: + vllm_cfg: + tensor_parallel_size: 1 +logger: + log_dir: logs/grpo-qwen3-32b-4n4g + wandb_enabled: true + tensorboard_enabled: true + wandb: + project: nemo-rl + name: grpo-qwen3-32b-4n4g +cluster: + gpus_per_node: 4 + num_nodes: 4 + diff --git a/examples/configs/recipes/llm/performance/grpo-qwen3-32b-8n4g-async-1off.yaml b/examples/configs/recipes/llm/performance/grpo-qwen3-32b-8n4g-async-1off.yaml new file mode 100644 index 0000000000..4f8a0a03bb --- /dev/null +++ b/examples/configs/recipes/llm/performance/grpo-qwen3-32b-8n4g-async-1off.yaml @@ -0,0 +1,33 @@ +defaults: ./grpo-qwen3-32b-4n4g.yaml +grpo: + async_grpo: + enabled: true + max_trajectory_age_steps: 1 + in_flight_weight_updates: true +loss_fn: + use_importance_sampling_correction: true +checkpointing: + checkpoint_dir: results/grpo-qwen3-32b-8n4g-async-1off +policy: + megatron_cfg: + tensor_model_parallel_size: 2 + pipeline_model_parallel_size: 1 + sequence_parallel: true + generation: + colocated: + enabled: false + resources: + num_nodes: 4 + gpus_per_node: 4 + vllm_cfg: + async_engine: true + tensor_parallel_size: 1 + gpu_memory_utilization: 0.8 +logger: + log_dir: logs/grpo-qwen3-32b-8n4g-async-1off + wandb: + name: grpo-qwen3-32b-8n4g-async-1off +cluster: + gpus_per_node: 4 + num_nodes: 8 + diff --git a/tests/test_suites/llm/performance/grpo-llama3.1-8b-instruct-2n4g-async-1off.sh b/tests/test_suites/llm/performance/grpo-llama3.1-8b-instruct-2n4g-async-1off.sh new file mode 100755 index 0000000000..e7636f3e93 --- /dev/null +++ b/tests/test_suites/llm/performance/grpo-llama3.1-8b-instruct-2n4g-async-1off.sh @@ -0,0 +1,39 @@ +#!/bin/bash +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd) +source $SCRIPT_DIR/common.env + +# ===== BEGIN CONFIG ===== +NUM_NODES=2 +STEPS_PER_RUN=10 +MAX_STEPS=10 +NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN )) # Round up +NUM_MINUTES=100 +# ===== END CONFIG ===== + +exit_if_max_steps_reached + +# Run the experiment +cd $PROJECT_ROOT +uv run examples/run_grpo_math.py \ + --config $CONFIG_PATH \ + grpo.max_num_steps=$MAX_STEPS \ + logger.log_dir=$LOG_DIR \ + logger.wandb_enabled=True \ + logger.wandb.project=nemo-rl \ + logger.wandb.name=$EXP_NAME \ + logger.monitor_gpus=True \ + logger.tensorboard_enabled=True \ + checkpointing.enabled=True \ + checkpointing.checkpoint_dir=$CKPT_DIR \ + $@ \ + 2>&1 | tee $RUN_LOG + +# Convert tensorboard logs to json +uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS + +# Only run metrics if the target step is reached +if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then + uv run tests/check_metrics.py $JSON_METRICS \ + 'mean(data["train/token_mult_prob_error"]) < 1.1' \ + 'data["train/token_mult_prob_error"]["10"] < 1.1' +fi diff --git a/tests/test_suites/llm/performance/grpo-llama3.1-8b-instruct-2n4g.sh b/tests/test_suites/llm/performance/grpo-llama3.1-8b-instruct-2n4g.sh new file mode 100755 index 0000000000..e7636f3e93 --- /dev/null +++ b/tests/test_suites/llm/performance/grpo-llama3.1-8b-instruct-2n4g.sh @@ -0,0 +1,39 @@ +#!/bin/bash +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd) +source $SCRIPT_DIR/common.env + +# ===== BEGIN CONFIG ===== +NUM_NODES=2 +STEPS_PER_RUN=10 +MAX_STEPS=10 +NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN )) # Round up +NUM_MINUTES=100 +# ===== END CONFIG ===== + +exit_if_max_steps_reached + +# Run the experiment +cd $PROJECT_ROOT +uv run examples/run_grpo_math.py \ + --config $CONFIG_PATH \ + grpo.max_num_steps=$MAX_STEPS \ + logger.log_dir=$LOG_DIR \ + logger.wandb_enabled=True \ + logger.wandb.project=nemo-rl \ + logger.wandb.name=$EXP_NAME \ + logger.monitor_gpus=True \ + logger.tensorboard_enabled=True \ + checkpointing.enabled=True \ + checkpointing.checkpoint_dir=$CKPT_DIR \ + $@ \ + 2>&1 | tee $RUN_LOG + +# Convert tensorboard logs to json +uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS + +# Only run metrics if the target step is reached +if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then + uv run tests/check_metrics.py $JSON_METRICS \ + 'mean(data["train/token_mult_prob_error"]) < 1.1' \ + 'data["train/token_mult_prob_error"]["10"] < 1.1' +fi diff --git a/tests/test_suites/llm/performance/grpo-qwen3-30ba3b-4n4g.sh b/tests/test_suites/llm/performance/grpo-qwen3-30ba3b-4n4g.sh new file mode 100755 index 0000000000..2a56609ffd --- /dev/null +++ b/tests/test_suites/llm/performance/grpo-qwen3-30ba3b-4n4g.sh @@ -0,0 +1,40 @@ +#!/bin/bash +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd) +source $SCRIPT_DIR/common.env + +# ===== BEGIN CONFIG ===== +NUM_NODES=4 +STEPS_PER_RUN=10 +MAX_STEPS=10 +NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN )) # Round up +NUM_MINUTES=100 +# ===== END CONFIG ===== + +exit_if_max_steps_reached + +# Run the experiment +cd $PROJECT_ROOT +uv run examples/run_grpo_math.py \ + --config $CONFIG_PATH \ + grpo.max_num_steps=$MAX_STEPS \ + logger.log_dir=$LOG_DIR \ + logger.wandb_enabled=True \ + logger.wandb.project=nemo-rl \ + logger.wandb.name=$EXP_NAME \ + logger.monitor_gpus=True \ + logger.tensorboard_enabled=True \ + checkpointing.enabled=True \ + checkpointing.checkpoint_dir=$CKPT_DIR \ + $@ \ + 2>&1 | tee $RUN_LOG + +# Convert tensorboard logs to json +uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS + +# Only run metrics if the target step is reached +if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then + uv run tests/check_metrics.py $JSON_METRICS \ + 'mean(data["train/token_mult_prob_error"]) < 1.1' \ + 'data["train/token_mult_prob_error"]["10"] < 1.1' +fi + diff --git a/tests/test_suites/llm/performance/grpo-qwen3-30ba3b-8n4g-async-1off.sh b/tests/test_suites/llm/performance/grpo-qwen3-30ba3b-8n4g-async-1off.sh new file mode 100644 index 0000000000..8350d128e8 --- /dev/null +++ b/tests/test_suites/llm/performance/grpo-qwen3-30ba3b-8n4g-async-1off.sh @@ -0,0 +1,40 @@ +#!/bin/bash +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd) +source $SCRIPT_DIR/common.env + +# ===== BEGIN CONFIG ===== +NUM_NODES=8 +STEPS_PER_RUN=10 +MAX_STEPS=10 +NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN )) # Round up +NUM_MINUTES=100 +# ===== END CONFIG ===== + +exit_if_max_steps_reached + +# Run the experiment +cd $PROJECT_ROOT +uv run examples/run_grpo_math.py \ + --config $CONFIG_PATH \ + grpo.max_num_steps=$MAX_STEPS \ + logger.log_dir=$LOG_DIR \ + logger.wandb_enabled=True \ + logger.wandb.project=nemo-rl \ + logger.wandb.name=$EXP_NAME \ + logger.monitor_gpus=True \ + logger.tensorboard_enabled=True \ + checkpointing.enabled=True \ + checkpointing.checkpoint_dir=$CKPT_DIR \ + $@ \ + 2>&1 | tee $RUN_LOG + +# Convert tensorboard logs to json +uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS + +# Only run metrics if the target step is reached +if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then + uv run tests/check_metrics.py $JSON_METRICS \ + 'mean(data["train/token_mult_prob_error"]) < 1.1' \ + 'data["train/token_mult_prob_error"]["10"] < 1.1' +fi + diff --git a/tests/test_suites/llm/performance/grpo-qwen3-32b-4n4g.sh b/tests/test_suites/llm/performance/grpo-qwen3-32b-4n4g.sh new file mode 100755 index 0000000000..2a56609ffd --- /dev/null +++ b/tests/test_suites/llm/performance/grpo-qwen3-32b-4n4g.sh @@ -0,0 +1,40 @@ +#!/bin/bash +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd) +source $SCRIPT_DIR/common.env + +# ===== BEGIN CONFIG ===== +NUM_NODES=4 +STEPS_PER_RUN=10 +MAX_STEPS=10 +NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN )) # Round up +NUM_MINUTES=100 +# ===== END CONFIG ===== + +exit_if_max_steps_reached + +# Run the experiment +cd $PROJECT_ROOT +uv run examples/run_grpo_math.py \ + --config $CONFIG_PATH \ + grpo.max_num_steps=$MAX_STEPS \ + logger.log_dir=$LOG_DIR \ + logger.wandb_enabled=True \ + logger.wandb.project=nemo-rl \ + logger.wandb.name=$EXP_NAME \ + logger.monitor_gpus=True \ + logger.tensorboard_enabled=True \ + checkpointing.enabled=True \ + checkpointing.checkpoint_dir=$CKPT_DIR \ + $@ \ + 2>&1 | tee $RUN_LOG + +# Convert tensorboard logs to json +uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS + +# Only run metrics if the target step is reached +if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then + uv run tests/check_metrics.py $JSON_METRICS \ + 'mean(data["train/token_mult_prob_error"]) < 1.1' \ + 'data["train/token_mult_prob_error"]["10"] < 1.1' +fi + diff --git a/tests/test_suites/llm/performance/grpo-qwen3-32b-8n4g-async-1off.sh b/tests/test_suites/llm/performance/grpo-qwen3-32b-8n4g-async-1off.sh new file mode 100644 index 0000000000..35d58c98f7 --- /dev/null +++ b/tests/test_suites/llm/performance/grpo-qwen3-32b-8n4g-async-1off.sh @@ -0,0 +1,39 @@ +#!/bin/bash +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd) +source $SCRIPT_DIR/common.env + +# ===== BEGIN CONFIG ===== +NUM_NODES=8 +STEPS_PER_RUN=10 +MAX_STEPS=10 +NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN )) # Round up +NUM_MINUTES=100 +# ===== END CONFIG ===== + +exit_if_max_steps_reached + +# Run the experiment +cd $PROJECT_ROOT +uv run examples/run_grpo_math.py \ + --config $CONFIG_PATH \ + grpo.max_num_steps=$MAX_STEPS \ + logger.log_dir=$LOG_DIR \ + logger.wandb_enabled=True \ + logger.wandb.project=nemo-rl \ + logger.wandb.name=$EXP_NAME \ + logger.monitor_gpus=True \ + logger.tensorboard_enabled=True \ + checkpointing.enabled=True \ + checkpointing.checkpoint_dir=$CKPT_DIR \ + $@ \ + 2>&1 | tee $RUN_LOG + +# Convert tensorboard logs to json +uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS + +# Only run metrics if the target step is reached +if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then + uv run tests/check_metrics.py $JSON_METRICS \ + 'mean(data["train/token_mult_prob_error"]) < 1.1' \ + 'data["train/token_mult_prob_error"]["10"] < 1.1' +fi diff --git a/tests/test_suites/performance.txt b/tests/test_suites/performance.txt index e96ba7e110..2bc3e13efd 100644 --- a/tests/test_suites/performance.txt +++ b/tests/test_suites/performance.txt @@ -30,9 +30,15 @@ tests/test_suites/llm/performance/grpo-llama3.1-8b-instruct-2n8g-fp8-async-1off. # GB200 BF16 ## SYNC +tests/test_suites/llm/performance/grpo-llama3.1-8b-instruct-2n4g.sh +tests/test_suites/llm/performance/grpo-qwen3-32b-4n4g.sh +tests/test_suites/llm/performance/grpo-qwen3-30ba3b-4n4g.sh tests/test_suites/llm/performance/grpo-deepseek-v3-32n4g.sh tests/test_suites/llm/performance/grpo-qwen3-235b-16n4g.sh ## ASYNC 1-off +tests/test_suites/llm/performance/grpo-llama3.1-8b-instruct-2n4g-async-1off.sh +tests/test_suites/llm/performance/grpo-qwen3-32b-8n4g-async-1off.sh +tests/test_suites/llm/performance/grpo-qwen3-30ba3b-8n4g-async-1off.sh tests/test_suites/llm/performance/grpo-deepseek-v3-64n4g-async-1off.sh tests/test_suites/llm/performance/grpo-qwen3-235b-32n4g-async-1off.sh