From 1cdf5aa3e79b682e1314369820d7cab8286880ef Mon Sep 17 00:00:00 2001 From: Seonjin Na Date: Thu, 18 Dec 2025 16:53:15 -0800 Subject: [PATCH 1/3] feat: Add GB200 perf recipes for llama3-8b, qwen3-30ba3,qwen3-32b --- .../grpo-llama3.1-8b-instruct-2n4g.yaml | 58 +++++++++++++++++++ .../performance/grpo-qwen3-30ba3b-4n4g.yaml | 45 ++++++++++++++ .../llm/performance/grpo-qwen3-32b-4n4g.yaml | 42 ++++++++++++++ .../grpo-llama3.1-8b-instruct-2n4g.sh | 39 +++++++++++++ .../llm/performance/grpo-qwen3-30ba3b-4n4g.sh | 40 +++++++++++++ .../llm/performance/grpo-qwen3-32b-4n4g.sh | 40 +++++++++++++ tests/test_suites/performance.txt | 11 +++- 7 files changed, 274 insertions(+), 1 deletion(-) create mode 100644 examples/configs/recipes/llm/performance/grpo-llama3.1-8b-instruct-2n4g.yaml create mode 100644 examples/configs/recipes/llm/performance/grpo-qwen3-30ba3b-4n4g.yaml create mode 100644 examples/configs/recipes/llm/performance/grpo-qwen3-32b-4n4g.yaml create mode 100755 tests/test_suites/llm/performance/grpo-llama3.1-8b-instruct-2n4g.sh create mode 100755 tests/test_suites/llm/performance/grpo-qwen3-30ba3b-4n4g.sh create mode 100755 tests/test_suites/llm/performance/grpo-qwen3-32b-4n4g.sh diff --git a/examples/configs/recipes/llm/performance/grpo-llama3.1-8b-instruct-2n4g.yaml b/examples/configs/recipes/llm/performance/grpo-llama3.1-8b-instruct-2n4g.yaml new file mode 100644 index 0000000000..a99f7c1498 --- /dev/null +++ b/examples/configs/recipes/llm/performance/grpo-llama3.1-8b-instruct-2n4g.yaml @@ -0,0 +1,58 @@ +defaults: ../../../grpo_math_1B.yaml +grpo: + num_prompts_per_step: 64 + num_generations_per_prompt: 32 + max_num_steps: 500 +loss_fn: + use_importance_sampling_correction: true +checkpointing: + checkpoint_dir: results/grpo-llama3.1-8b-instruct-2n4g +policy: + model_name: meta-llama/Llama-3.1-8B-Instruct + tokenizer: + name: meta-llama/Llama-3.1-8B-Instruct + train_micro_batch_size: 1 + logprob_batch_size: 2 + max_total_sequence_length: 4096 + make_sequence_length_divisible_by: 1 + dtensor_cfg: + enabled: false + megatron_cfg: + enabled: true + empty_unused_memory_level: 1 + converter_type: LlamaForCausalLM + tensor_model_parallel_size: 1 + pipeline_model_parallel_size: 1 + sequence_parallel: false + activation_checkpointing: true + defer_fp32_logits: true + optimizer: + lr: 5.0e-07 + min_lr: 5.0e-08 + weight_decay: 0.0 + use_precision_aware_optimizer: true + scheduler: + lr_warmup_iters: 2 + lr_warmup_init: 5.0e-08 + fp8_cfg: + enabled: false + generation: + max_new_tokens: 4096 + stop_token_ids: + - 128009 + vllm_cfg: + max_model_len: 4096 + tensor_parallel_size: 1 +data: + max_input_seq_length: 4096 +logger: + log_dir: logs/grpo-llama3.1-8b-instruct-2n4g + wandb_enabled: true + tensorboard_enabled: true + wandb: + project: nemo-rl + name: grpo-llama3.1-8b-instruct-2n4g +cluster: + gpus_per_node: 4 + num_nodes: 2 + diff --git a/examples/configs/recipes/llm/performance/grpo-qwen3-30ba3b-4n4g.yaml b/examples/configs/recipes/llm/performance/grpo-qwen3-30ba3b-4n4g.yaml new file mode 100644 index 0000000000..21b9746f4b --- /dev/null +++ b/examples/configs/recipes/llm/performance/grpo-qwen3-30ba3b-4n4g.yaml @@ -0,0 +1,45 @@ +defaults: ../../../grpo_math_1B.yaml +grpo: + num_prompts_per_step: 64 + num_generations_per_prompt: 32 +checkpointing: + enabled: false + checkpoint_dir: results/grpo-qwen3-30ba3b-4n4g +policy: + model_name: Qwen/Qwen3-30B-A3B + train_micro_batch_size: 1 + max_total_sequence_length: 4096 + dtensor_cfg: + enabled: false + optimizer: null + scheduler: null + make_sequence_length_divisible_by: ${policy.megatron_cfg.tensor_model_parallel_size} + megatron_cfg: + enabled: true + empty_unused_memory_level: 1 + tensor_model_parallel_size: 1 + pipeline_model_parallel_size: 1 + expert_model_parallel_size: 16 + sequence_parallel: false + optimizer: + lr: 3.0e-07 + min_lr: 3.0e-08 + scheduler: + lr_warmup_iters: 50 + lr_warmup_init: 3.0e-08 + env_vars: + PYTORCH_CUDA_ALLOC_CONF: expandable_segments:False + generation: + vllm_cfg: + tensor_parallel_size: 1 +logger: + log_dir: logs/grpo-qwen3-30ba3b-4n4g + wandb_enabled: true + tensorboard_enabled: true + wandb: + project: nemo-rl + name: grpo-qwen3-30ba3b-4n4g +cluster: + gpus_per_node: 4 + num_nodes: 4 + diff --git a/examples/configs/recipes/llm/performance/grpo-qwen3-32b-4n4g.yaml b/examples/configs/recipes/llm/performance/grpo-qwen3-32b-4n4g.yaml new file mode 100644 index 0000000000..9b98877b18 --- /dev/null +++ b/examples/configs/recipes/llm/performance/grpo-qwen3-32b-4n4g.yaml @@ -0,0 +1,42 @@ +defaults: ../../../grpo_math_1B.yaml +grpo: + num_prompts_per_step: 64 + num_generations_per_prompt: 32 +checkpointing: + enabled: false + checkpoint_dir: results/grpo-qwen3-32b-4n4g +policy: + model_name: Qwen/Qwen3-32B + train_micro_batch_size: 1 + max_total_sequence_length: 4096 + dtensor_cfg: + enabled: false + optimizer: null + scheduler: null + make_sequence_length_divisible_by: ${policy.megatron_cfg.tensor_model_parallel_size} + megatron_cfg: + enabled: true + empty_unused_memory_level: 1 + tensor_model_parallel_size: 4 + pipeline_model_parallel_size: 1 + sequence_parallel: true + optimizer: + lr: 3.0e-07 + min_lr: 3.0e-08 + scheduler: + lr_warmup_iters: 2 + lr_warmup_init: 3.0e-08 + generation: + vllm_cfg: + tensor_parallel_size: 1 +logger: + log_dir: logs/grpo-qwen3-32b-4n4g + wandb_enabled: true + tensorboard_enabled: true + wandb: + project: nemo-rl + name: grpo-qwen3-32b-4n4g +cluster: + gpus_per_node: 4 + num_nodes: 4 + diff --git a/tests/test_suites/llm/performance/grpo-llama3.1-8b-instruct-2n4g.sh b/tests/test_suites/llm/performance/grpo-llama3.1-8b-instruct-2n4g.sh new file mode 100755 index 0000000000..e7636f3e93 --- /dev/null +++ b/tests/test_suites/llm/performance/grpo-llama3.1-8b-instruct-2n4g.sh @@ -0,0 +1,39 @@ +#!/bin/bash +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd) +source $SCRIPT_DIR/common.env + +# ===== BEGIN CONFIG ===== +NUM_NODES=2 +STEPS_PER_RUN=10 +MAX_STEPS=10 +NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN )) # Round up +NUM_MINUTES=100 +# ===== END CONFIG ===== + +exit_if_max_steps_reached + +# Run the experiment +cd $PROJECT_ROOT +uv run examples/run_grpo_math.py \ + --config $CONFIG_PATH \ + grpo.max_num_steps=$MAX_STEPS \ + logger.log_dir=$LOG_DIR \ + logger.wandb_enabled=True \ + logger.wandb.project=nemo-rl \ + logger.wandb.name=$EXP_NAME \ + logger.monitor_gpus=True \ + logger.tensorboard_enabled=True \ + checkpointing.enabled=True \ + checkpointing.checkpoint_dir=$CKPT_DIR \ + $@ \ + 2>&1 | tee $RUN_LOG + +# Convert tensorboard logs to json +uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS + +# Only run metrics if the target step is reached +if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then + uv run tests/check_metrics.py $JSON_METRICS \ + 'mean(data["train/token_mult_prob_error"]) < 1.1' \ + 'data["train/token_mult_prob_error"]["10"] < 1.1' +fi diff --git a/tests/test_suites/llm/performance/grpo-qwen3-30ba3b-4n4g.sh b/tests/test_suites/llm/performance/grpo-qwen3-30ba3b-4n4g.sh new file mode 100755 index 0000000000..2a56609ffd --- /dev/null +++ b/tests/test_suites/llm/performance/grpo-qwen3-30ba3b-4n4g.sh @@ -0,0 +1,40 @@ +#!/bin/bash +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd) +source $SCRIPT_DIR/common.env + +# ===== BEGIN CONFIG ===== +NUM_NODES=4 +STEPS_PER_RUN=10 +MAX_STEPS=10 +NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN )) # Round up +NUM_MINUTES=100 +# ===== END CONFIG ===== + +exit_if_max_steps_reached + +# Run the experiment +cd $PROJECT_ROOT +uv run examples/run_grpo_math.py \ + --config $CONFIG_PATH \ + grpo.max_num_steps=$MAX_STEPS \ + logger.log_dir=$LOG_DIR \ + logger.wandb_enabled=True \ + logger.wandb.project=nemo-rl \ + logger.wandb.name=$EXP_NAME \ + logger.monitor_gpus=True \ + logger.tensorboard_enabled=True \ + checkpointing.enabled=True \ + checkpointing.checkpoint_dir=$CKPT_DIR \ + $@ \ + 2>&1 | tee $RUN_LOG + +# Convert tensorboard logs to json +uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS + +# Only run metrics if the target step is reached +if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then + uv run tests/check_metrics.py $JSON_METRICS \ + 'mean(data["train/token_mult_prob_error"]) < 1.1' \ + 'data["train/token_mult_prob_error"]["10"] < 1.1' +fi + diff --git a/tests/test_suites/llm/performance/grpo-qwen3-32b-4n4g.sh b/tests/test_suites/llm/performance/grpo-qwen3-32b-4n4g.sh new file mode 100755 index 0000000000..2a56609ffd --- /dev/null +++ b/tests/test_suites/llm/performance/grpo-qwen3-32b-4n4g.sh @@ -0,0 +1,40 @@ +#!/bin/bash +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd) +source $SCRIPT_DIR/common.env + +# ===== BEGIN CONFIG ===== +NUM_NODES=4 +STEPS_PER_RUN=10 +MAX_STEPS=10 +NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN )) # Round up +NUM_MINUTES=100 +# ===== END CONFIG ===== + +exit_if_max_steps_reached + +# Run the experiment +cd $PROJECT_ROOT +uv run examples/run_grpo_math.py \ + --config $CONFIG_PATH \ + grpo.max_num_steps=$MAX_STEPS \ + logger.log_dir=$LOG_DIR \ + logger.wandb_enabled=True \ + logger.wandb.project=nemo-rl \ + logger.wandb.name=$EXP_NAME \ + logger.monitor_gpus=True \ + logger.tensorboard_enabled=True \ + checkpointing.enabled=True \ + checkpointing.checkpoint_dir=$CKPT_DIR \ + $@ \ + 2>&1 | tee $RUN_LOG + +# Convert tensorboard logs to json +uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS + +# Only run metrics if the target step is reached +if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then + uv run tests/check_metrics.py $JSON_METRICS \ + 'mean(data["train/token_mult_prob_error"]) < 1.1' \ + 'data["train/token_mult_prob_error"]["10"] < 1.1' +fi + diff --git a/tests/test_suites/performance.txt b/tests/test_suites/performance.txt index bf714b0e74..7468fab4d3 100644 --- a/tests/test_suites/performance.txt +++ b/tests/test_suites/performance.txt @@ -2,6 +2,7 @@ # GRPO # ######## +# H100 (8 GPUs/node) tests/test_suites/llm/performance/grpo-llama3.1-8b-instruct-2n8g.sh tests/test_suites/llm/performance/grpo-qwen3-30ba3b-4n8g.sh tests/test_suites/llm/performance/grpo-deepseek-v3-32n8g.sh @@ -14,4 +15,12 @@ tests/test_suites/llm/performance/grpo-qwen3-32b-8n8g-async-1off.sh tests/test_suites/llm/performance/grpo-qwen3-235b-32n8g-async-1off.sh tests/test_suites/llm/performance/grpo-qwen3-30ba3b-4n8g-async-1off.sh -tests/test_suites/llm/performance/grpo-qwen3-30ba3b-24n8g-async-8off.sh \ No newline at end of file +<<<<<<< HEAD +tests/test_suites/llm/performance/grpo-qwen3-30ba3b-24n8g-async-8off.sh +======= + +# GB200 (4 GPUs/node) +tests/test_suites/llm/performance/grpo-llama3.1-8b-instruct-2n4g.sh +tests/test_suites/llm/performance/grpo-qwen3-32b-4n4g.sh +tests/test_suites/llm/performance/grpo-qwen3-30ba3b-4n4g.sh +>>>>>>> 878206e8 (feat: Add GB200 perf recipes for llama3-8b, qwen3-30ba3,qwen3-32b) From 9c2bc1c965e62dfb4623e00a9bda11fb4c218632 Mon Sep 17 00:00:00 2001 From: Seonjin Na Date: Thu, 18 Dec 2025 17:12:31 -0800 Subject: [PATCH 2/3] feat: Add GB200 perf recipes for llama3-8b, qwen3-30ba3,qwen3-32b --- tests/test_suites/performance.txt | 3 --- 1 file changed, 3 deletions(-) diff --git a/tests/test_suites/performance.txt b/tests/test_suites/performance.txt index 7468fab4d3..33c0c99686 100644 --- a/tests/test_suites/performance.txt +++ b/tests/test_suites/performance.txt @@ -15,12 +15,9 @@ tests/test_suites/llm/performance/grpo-qwen3-32b-8n8g-async-1off.sh tests/test_suites/llm/performance/grpo-qwen3-235b-32n8g-async-1off.sh tests/test_suites/llm/performance/grpo-qwen3-30ba3b-4n8g-async-1off.sh -<<<<<<< HEAD tests/test_suites/llm/performance/grpo-qwen3-30ba3b-24n8g-async-8off.sh -======= # GB200 (4 GPUs/node) tests/test_suites/llm/performance/grpo-llama3.1-8b-instruct-2n4g.sh tests/test_suites/llm/performance/grpo-qwen3-32b-4n4g.sh tests/test_suites/llm/performance/grpo-qwen3-30ba3b-4n4g.sh ->>>>>>> 878206e8 (feat: Add GB200 perf recipes for llama3-8b, qwen3-30ba3,qwen3-32b) From 70c1c395d47d6e38c8d868ab0d59d488ea47d5df Mon Sep 17 00:00:00 2001 From: Seonjin Na Date: Fri, 19 Dec 2025 14:27:48 -0800 Subject: [PATCH 3/3] feat: Add Async 1-off GB200 perf recipes for llama3-8b, qwen3-30ba3,qwen3-32b --- ...-llama3.1-8b-instruct-2n4g-async-1off.yaml | 32 +++++++++++++++ .../grpo-qwen3-30ba3b-8n4g-async-1off.yaml | 33 +++++++++++++++ .../llm/performance/grpo-qwen3-32b-4n4g.yaml | 2 +- .../grpo-qwen3-32b-8n4g-async-1off.yaml | 33 +++++++++++++++ ...po-llama3.1-8b-instruct-2n4g-async-1off.sh | 39 ++++++++++++++++++ .../grpo-qwen3-30ba3b-8n4g-async-1off.sh | 40 +++++++++++++++++++ .../grpo-qwen3-32b-8n4g-async-1off.sh | 39 ++++++++++++++++++ tests/test_suites/performance.txt | 3 ++ 8 files changed, 220 insertions(+), 1 deletion(-) create mode 100644 examples/configs/recipes/llm/performance/grpo-llama3.1-8b-instruct-2n4g-async-1off.yaml create mode 100644 examples/configs/recipes/llm/performance/grpo-qwen3-30ba3b-8n4g-async-1off.yaml create mode 100644 examples/configs/recipes/llm/performance/grpo-qwen3-32b-8n4g-async-1off.yaml create mode 100755 tests/test_suites/llm/performance/grpo-llama3.1-8b-instruct-2n4g-async-1off.sh create mode 100644 tests/test_suites/llm/performance/grpo-qwen3-30ba3b-8n4g-async-1off.sh create mode 100644 tests/test_suites/llm/performance/grpo-qwen3-32b-8n4g-async-1off.sh diff --git a/examples/configs/recipes/llm/performance/grpo-llama3.1-8b-instruct-2n4g-async-1off.yaml b/examples/configs/recipes/llm/performance/grpo-llama3.1-8b-instruct-2n4g-async-1off.yaml new file mode 100644 index 0000000000..d906eda2b4 --- /dev/null +++ b/examples/configs/recipes/llm/performance/grpo-llama3.1-8b-instruct-2n4g-async-1off.yaml @@ -0,0 +1,32 @@ +defaults: ./grpo-llama3.1-8b-instruct-2n4g.yaml +grpo: + async_grpo: + enabled: true + max_trajectory_age_steps: 1 + in_flight_weight_updates: true +loss_fn: + use_importance_sampling_correction: true +checkpointing: + checkpoint_dir: results/grpo-llama3.1-8b-instruct-2n4g-async-1off +policy: + megatron_cfg: + tensor_model_parallel_size: 1 + pipeline_model_parallel_size: 1 + sequence_parallel: false + generation: + colocated: + enabled: false + resources: + num_nodes: 1 + gpus_per_node: 4 + vllm_cfg: + async_engine: true + tensor_parallel_size: 1 + gpu_memory_utilization: 0.8 +logger: + log_dir: logs/grpo-llama3.1-8b-instruct-2n4g-async-1off + wandb: + name: grpo-llama3.1-8b-instruct-2n4g-async-1off +cluster: + gpus_per_node: 4 + num_nodes: 2 diff --git a/examples/configs/recipes/llm/performance/grpo-qwen3-30ba3b-8n4g-async-1off.yaml b/examples/configs/recipes/llm/performance/grpo-qwen3-30ba3b-8n4g-async-1off.yaml new file mode 100644 index 0000000000..a9837c87f2 --- /dev/null +++ b/examples/configs/recipes/llm/performance/grpo-qwen3-30ba3b-8n4g-async-1off.yaml @@ -0,0 +1,33 @@ +defaults: ./grpo-qwen3-30ba3b-4n4g.yaml +grpo: + async_grpo: + enabled: true + max_trajectory_age_steps: 1 + in_flight_weight_updates: true +loss_fn: + use_importance_sampling_correction: true +checkpointing: + checkpoint_dir: results/grpo-qwen3-30ba3b-8n4g-async-1off +policy: + megatron_cfg: + tensor_model_parallel_size: 1 + pipeline_model_parallel_size: 1 + expert_model_parallel_size: 16 + sequence_parallel: false + generation: + colocated: + enabled: false + resources: + num_nodes: 4 + gpus_per_node: 4 + vllm_cfg: + async_engine: true + tensor_parallel_size: 1 + gpu_memory_utilization: 0.8 +logger: + log_dir: logs/grpo-qwen3-30ba3b-8n4g-async-1off + wandb: + name: grpo-qwen3-30ba3b-8n4g-async-1off +cluster: + gpus_per_node: 4 + num_nodes: 8 diff --git a/examples/configs/recipes/llm/performance/grpo-qwen3-32b-4n4g.yaml b/examples/configs/recipes/llm/performance/grpo-qwen3-32b-4n4g.yaml index 9b98877b18..2e441cdb5f 100644 --- a/examples/configs/recipes/llm/performance/grpo-qwen3-32b-4n4g.yaml +++ b/examples/configs/recipes/llm/performance/grpo-qwen3-32b-4n4g.yaml @@ -17,7 +17,7 @@ policy: megatron_cfg: enabled: true empty_unused_memory_level: 1 - tensor_model_parallel_size: 4 + tensor_model_parallel_size: 2 pipeline_model_parallel_size: 1 sequence_parallel: true optimizer: diff --git a/examples/configs/recipes/llm/performance/grpo-qwen3-32b-8n4g-async-1off.yaml b/examples/configs/recipes/llm/performance/grpo-qwen3-32b-8n4g-async-1off.yaml new file mode 100644 index 0000000000..4f8a0a03bb --- /dev/null +++ b/examples/configs/recipes/llm/performance/grpo-qwen3-32b-8n4g-async-1off.yaml @@ -0,0 +1,33 @@ +defaults: ./grpo-qwen3-32b-4n4g.yaml +grpo: + async_grpo: + enabled: true + max_trajectory_age_steps: 1 + in_flight_weight_updates: true +loss_fn: + use_importance_sampling_correction: true +checkpointing: + checkpoint_dir: results/grpo-qwen3-32b-8n4g-async-1off +policy: + megatron_cfg: + tensor_model_parallel_size: 2 + pipeline_model_parallel_size: 1 + sequence_parallel: true + generation: + colocated: + enabled: false + resources: + num_nodes: 4 + gpus_per_node: 4 + vllm_cfg: + async_engine: true + tensor_parallel_size: 1 + gpu_memory_utilization: 0.8 +logger: + log_dir: logs/grpo-qwen3-32b-8n4g-async-1off + wandb: + name: grpo-qwen3-32b-8n4g-async-1off +cluster: + gpus_per_node: 4 + num_nodes: 8 + diff --git a/tests/test_suites/llm/performance/grpo-llama3.1-8b-instruct-2n4g-async-1off.sh b/tests/test_suites/llm/performance/grpo-llama3.1-8b-instruct-2n4g-async-1off.sh new file mode 100755 index 0000000000..e7636f3e93 --- /dev/null +++ b/tests/test_suites/llm/performance/grpo-llama3.1-8b-instruct-2n4g-async-1off.sh @@ -0,0 +1,39 @@ +#!/bin/bash +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd) +source $SCRIPT_DIR/common.env + +# ===== BEGIN CONFIG ===== +NUM_NODES=2 +STEPS_PER_RUN=10 +MAX_STEPS=10 +NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN )) # Round up +NUM_MINUTES=100 +# ===== END CONFIG ===== + +exit_if_max_steps_reached + +# Run the experiment +cd $PROJECT_ROOT +uv run examples/run_grpo_math.py \ + --config $CONFIG_PATH \ + grpo.max_num_steps=$MAX_STEPS \ + logger.log_dir=$LOG_DIR \ + logger.wandb_enabled=True \ + logger.wandb.project=nemo-rl \ + logger.wandb.name=$EXP_NAME \ + logger.monitor_gpus=True \ + logger.tensorboard_enabled=True \ + checkpointing.enabled=True \ + checkpointing.checkpoint_dir=$CKPT_DIR \ + $@ \ + 2>&1 | tee $RUN_LOG + +# Convert tensorboard logs to json +uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS + +# Only run metrics if the target step is reached +if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then + uv run tests/check_metrics.py $JSON_METRICS \ + 'mean(data["train/token_mult_prob_error"]) < 1.1' \ + 'data["train/token_mult_prob_error"]["10"] < 1.1' +fi diff --git a/tests/test_suites/llm/performance/grpo-qwen3-30ba3b-8n4g-async-1off.sh b/tests/test_suites/llm/performance/grpo-qwen3-30ba3b-8n4g-async-1off.sh new file mode 100644 index 0000000000..8350d128e8 --- /dev/null +++ b/tests/test_suites/llm/performance/grpo-qwen3-30ba3b-8n4g-async-1off.sh @@ -0,0 +1,40 @@ +#!/bin/bash +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd) +source $SCRIPT_DIR/common.env + +# ===== BEGIN CONFIG ===== +NUM_NODES=8 +STEPS_PER_RUN=10 +MAX_STEPS=10 +NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN )) # Round up +NUM_MINUTES=100 +# ===== END CONFIG ===== + +exit_if_max_steps_reached + +# Run the experiment +cd $PROJECT_ROOT +uv run examples/run_grpo_math.py \ + --config $CONFIG_PATH \ + grpo.max_num_steps=$MAX_STEPS \ + logger.log_dir=$LOG_DIR \ + logger.wandb_enabled=True \ + logger.wandb.project=nemo-rl \ + logger.wandb.name=$EXP_NAME \ + logger.monitor_gpus=True \ + logger.tensorboard_enabled=True \ + checkpointing.enabled=True \ + checkpointing.checkpoint_dir=$CKPT_DIR \ + $@ \ + 2>&1 | tee $RUN_LOG + +# Convert tensorboard logs to json +uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS + +# Only run metrics if the target step is reached +if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then + uv run tests/check_metrics.py $JSON_METRICS \ + 'mean(data["train/token_mult_prob_error"]) < 1.1' \ + 'data["train/token_mult_prob_error"]["10"] < 1.1' +fi + diff --git a/tests/test_suites/llm/performance/grpo-qwen3-32b-8n4g-async-1off.sh b/tests/test_suites/llm/performance/grpo-qwen3-32b-8n4g-async-1off.sh new file mode 100644 index 0000000000..35d58c98f7 --- /dev/null +++ b/tests/test_suites/llm/performance/grpo-qwen3-32b-8n4g-async-1off.sh @@ -0,0 +1,39 @@ +#!/bin/bash +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd) +source $SCRIPT_DIR/common.env + +# ===== BEGIN CONFIG ===== +NUM_NODES=8 +STEPS_PER_RUN=10 +MAX_STEPS=10 +NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN )) # Round up +NUM_MINUTES=100 +# ===== END CONFIG ===== + +exit_if_max_steps_reached + +# Run the experiment +cd $PROJECT_ROOT +uv run examples/run_grpo_math.py \ + --config $CONFIG_PATH \ + grpo.max_num_steps=$MAX_STEPS \ + logger.log_dir=$LOG_DIR \ + logger.wandb_enabled=True \ + logger.wandb.project=nemo-rl \ + logger.wandb.name=$EXP_NAME \ + logger.monitor_gpus=True \ + logger.tensorboard_enabled=True \ + checkpointing.enabled=True \ + checkpointing.checkpoint_dir=$CKPT_DIR \ + $@ \ + 2>&1 | tee $RUN_LOG + +# Convert tensorboard logs to json +uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS + +# Only run metrics if the target step is reached +if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then + uv run tests/check_metrics.py $JSON_METRICS \ + 'mean(data["train/token_mult_prob_error"]) < 1.1' \ + 'data["train/token_mult_prob_error"]["10"] < 1.1' +fi diff --git a/tests/test_suites/performance.txt b/tests/test_suites/performance.txt index 1199421349..2bc3e13efd 100644 --- a/tests/test_suites/performance.txt +++ b/tests/test_suites/performance.txt @@ -37,5 +37,8 @@ tests/test_suites/llm/performance/grpo-deepseek-v3-32n4g.sh tests/test_suites/llm/performance/grpo-qwen3-235b-16n4g.sh ## ASYNC 1-off +tests/test_suites/llm/performance/grpo-llama3.1-8b-instruct-2n4g-async-1off.sh +tests/test_suites/llm/performance/grpo-qwen3-32b-8n4g-async-1off.sh +tests/test_suites/llm/performance/grpo-qwen3-30ba3b-8n4g-async-1off.sh tests/test_suites/llm/performance/grpo-deepseek-v3-64n4g-async-1off.sh tests/test_suites/llm/performance/grpo-qwen3-235b-32n4g-async-1off.sh