diff --git a/examples/configs/recipes/llm/grpo-llama3.1-8b-instruct-2n8g-fsdp2tp1-noncolocated.yaml b/examples/configs/recipes/llm/grpo-llama3.1-8b-instruct-2n8g-fsdp2tp1-noncolocated.yaml new file mode 100644 index 0000000000..bb8a06a0a0 --- /dev/null +++ b/examples/configs/recipes/llm/grpo-llama3.1-8b-instruct-2n8g-fsdp2tp1-noncolocated.yaml @@ -0,0 +1,130 @@ +grpo: + num_prompts_per_step: 64 + num_generations_per_prompt: 32 + max_rollout_turns: 1 + max_num_steps: 500 + normalize_rewards: true + use_leave_one_out_baseline: true + val_period: 10 + val_at_start: false + max_val_samples: 256 + val_batch_size: 256 + seed: 42 +loss_fn: + reference_policy_kl_penalty: 0.01 + ratio_clip_min: 0.2 + ratio_clip_max: 0.2 + ratio_clip_c: null + use_on_policy_kl_approximation: false + use_importance_sampling_correction: false + token_level_loss: true +checkpointing: + enabled: true + checkpoint_dir: results/grpo-llama3.1-8b-instruct-2n8g-fsdp2tp1-noncolocated + metric_name: val_reward + higher_is_better: true + keep_top_k: 3 + save_period: 10 + checkpoint_must_save_by: null +policy: + model_name: meta-llama/Llama-3.1-8B-Instruct + tokenizer: + name: meta-llama/Llama-3.1-8B-Instruct + train_global_batch_size: 512 + train_micro_batch_size: 1 + generation_batch_size: 32 + logprob_batch_size: 2 + max_total_sequence_length: 4096 + precision: bfloat16 + dtensor_cfg: + enabled: true + cpu_offload: false + sequence_parallel: false + activation_checkpointing: false + tensor_parallel_size: 1 + context_parallel_size: 1 + custom_parallel_plan: null + dynamic_batching: + enabled: True + train_mb_tokens: ${mul:${policy.max_total_sequence_length}, ${policy.train_micro_batch_size}} + logprob_mb_tokens: ${mul:${policy.max_total_sequence_length}, ${policy.logprob_batch_size}} + sequence_length_round: 64 + sequence_packing: + enabled: false + train_mb_tokens: ${mul:${policy.max_total_sequence_length}, ${policy.train_micro_batch_size}} + logprob_mb_tokens: ${mul:${policy.max_total_sequence_length}, ${policy.logprob_batch_size}} + algorithm: "modified_first_fit_decreasing" + sequence_length_round: 64 + make_sequence_length_divisible_by: 1 + max_grad_norm: 1 + optimizer: + name: torch.optim.AdamW + kwargs: + lr: 3e-07 + weight_decay: 0.01 + betas: + - 0.9 + - 0.999 + eps: 1e-08 + foreach: false + fused: false + scheduler: + - name: torch.optim.lr_scheduler.LinearLR + kwargs: + start_factor: 0.1 + end_factor: 1 + total_iters: 13 + - name: torch.optim.lr_scheduler.ConstantLR + kwargs: + factor: 1 + total_iters: 10000000000 + - milestones: + - 13 + generation: + backend: vllm + max_new_tokens: 4096 + temperature: 1 + top_p: 1 + top_k: null + stop_token_ids: + - 128009 + stop_strings: null + vllm_cfg: + async_engine: true + precision: ${policy.precision} + tensor_parallel_size: 1 + pipeline_parallel_size: 1 + gpu_memory_utilization: 0.6 + max_model_len: 4096 + enforce_eager: False + colocated: + enabled: false + resources: + gpus_per_node: null + num_nodes: 1 +data: + max_input_seq_length: 4096 + prompt_file: examples/prompts/cot.txt + system_prompt_file: null + dataset_name: OpenMathInstruct-2 + shuffle: true +env: + math: + num_workers: 8 +logger: + log_dir: logs/grpo-llama3.1-8b-instruct-2n8g-fsdp2tp1-noncolocated + num_val_samples_to_print: 0 + wandb_enabled: true + tensorboard_enabled: true + mlflow_enabled: false + monitor_gpus: true + wandb: + project: nemo-rl + name: grpo-llama3.1-8b-instruct-2n8g-fsdp2tp1-noncolocated + tensorboard: {} + gpu_monitoring: + collection_interval: 10 + flush_interval: 10 +cluster: + gpus_per_node: 8 + num_nodes: 2 diff --git a/tests/test_suites/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp2tp2-quick.v2.sh b/tests/test_suites/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp2tp2-quick.v2.sh index 0162bd8bb9..a8d2d04adc 100755 --- a/tests/test_suites/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp2tp2-quick.v2.sh +++ b/tests/test_suites/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp2tp2-quick.v2.sh @@ -40,4 +40,4 @@ if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | ma 'data["train/preference_loss"]["1"] < 0.69316' \ 'data["train/preference_loss"]["20"] < 0.6' \ 'mean(data["timing/train/total_step_time"], -10, -1) < 7.8' -fi +fi diff --git a/tests/test_suites/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp2tp4.sh b/tests/test_suites/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp2tp4.sh index df74127ba2..fbda6865f5 100755 --- a/tests/test_suites/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp2tp4.sh +++ b/tests/test_suites/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp2tp4.sh @@ -40,4 +40,4 @@ if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | ma 'data["train/preference_loss"]["1"] < 0.69316' \ 'data["train/preference_loss"]["150"] < 0.4' \ 'mean(data["timing/train/total_step_time"], -11, -1) < 24' -fi +fi diff --git a/tests/test_suites/llm/dpo-llama3.1-8b-instruct-4n8g-megatron.v2.sh b/tests/test_suites/llm/dpo-llama3.1-8b-instruct-4n8g-megatron.v2.sh index 8701d63d1f..7cc74e26df 100755 --- a/tests/test_suites/llm/dpo-llama3.1-8b-instruct-4n8g-megatron.v2.sh +++ b/tests/test_suites/llm/dpo-llama3.1-8b-instruct-4n8g-megatron.v2.sh @@ -40,4 +40,4 @@ if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | ma 'data["train/preference_loss"]["1"] < 0.69316' \ 'data["train/preference_loss"]["150"] < 0.4' \ 'mean(data["timing/train/total_step_time"], -11, -1) < 11.5' -fi +fi diff --git a/tests/test_suites/llm/dpo-llama3.1-8b-instruct-4n8g-megatrontp2pp2-quick.sh b/tests/test_suites/llm/dpo-llama3.1-8b-instruct-4n8g-megatrontp2pp2-quick.sh index 0bc8e13e28..497e0b8f68 100755 --- a/tests/test_suites/llm/dpo-llama3.1-8b-instruct-4n8g-megatrontp2pp2-quick.sh +++ b/tests/test_suites/llm/dpo-llama3.1-8b-instruct-4n8g-megatrontp2pp2-quick.sh @@ -40,4 +40,4 @@ if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | ma 'data["train/preference_loss"]["1"] < 0.69316' \ 'data["train/preference_loss"]["20"] < 0.6' \ 'mean(data["timing/train/total_step_time"], -10) < 6.7' -fi +fi diff --git a/tests/test_suites/llm/dpo-llama3.1-8b-tulu3-1n8g-fsdp2tp1.sh b/tests/test_suites/llm/dpo-llama3.1-8b-tulu3-1n8g-fsdp2tp1.sh index 35da749ac7..a6beabb886 100755 --- a/tests/test_suites/llm/dpo-llama3.1-8b-tulu3-1n8g-fsdp2tp1.sh +++ b/tests/test_suites/llm/dpo-llama3.1-8b-tulu3-1n8g-fsdp2tp1.sh @@ -40,4 +40,3 @@ if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | ma 'data["train/preference_loss"]["1"] < 0.6932' \ 'data["train/preference_loss"]["150"] < 0.68' fi - diff --git a/tests/test_suites/llm/dpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.v2.sh b/tests/test_suites/llm/dpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.v2.sh index 48691c0df4..0b0c67b312 100755 --- a/tests/test_suites/llm/dpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.v2.sh +++ b/tests/test_suites/llm/dpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.v2.sh @@ -38,4 +38,4 @@ if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | ma 'data["train/loss"]["1"] < 0.69316' \ 'data["train/loss"]["150"] < 0.55' \ 'mean(data["timing/train/total_step_time"], -11, -1) < 1.3' -fi +fi diff --git a/tests/test_suites/llm/grpo-deepscaler-1.5b-16K.sh b/tests/test_suites/llm/grpo-deepscaler-1.5b-16K.sh index 68c4f0c6b2..633b0d8297 100755 --- a/tests/test_suites/llm/grpo-deepscaler-1.5b-16K.sh +++ b/tests/test_suites/llm/grpo-deepscaler-1.5b-16K.sh @@ -66,4 +66,3 @@ cat ${RUN_LOG}.aime-16k | grep "score=" | sed 's/.*score=\([^ ]*\).*/{"sco # 240 step checkpoint 0.3 uv run tests/check_metrics.py ${RUN_LOG}-16k-metric.json \ 'data["score"] >= 0.2396' - diff --git a/tests/test_suites/llm/grpo-deepscaler-1.5b-24K.sh b/tests/test_suites/llm/grpo-deepscaler-1.5b-24K.sh index b045607570..92dc48f719 100755 --- a/tests/test_suites/llm/grpo-deepscaler-1.5b-24K.sh +++ b/tests/test_suites/llm/grpo-deepscaler-1.5b-24K.sh @@ -65,4 +65,3 @@ cat ${RUN_LOG}.aime-24k | grep "score=" | sed 's/.*score=\([^ ]*\).*/{"sco uv run tests/check_metrics.py ${RUN_LOG}-24k-metric.json \ 'data["score"] >= 0.2396' - diff --git a/tests/test_suites/llm/grpo-gemma3-27b-it-16n8g-fsdp2tp8sp-actckpt-long.sh b/tests/test_suites/llm/grpo-gemma3-27b-it-16n8g-fsdp2tp8sp-actckpt-long.sh index 69c9899ccd..a6ce1800d9 100755 --- a/tests/test_suites/llm/grpo-gemma3-27b-it-16n8g-fsdp2tp8sp-actckpt-long.sh +++ b/tests/test_suites/llm/grpo-gemma3-27b-it-16n8g-fsdp2tp8sp-actckpt-long.sh @@ -37,4 +37,3 @@ if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | ma 'mean(data["train/token_mult_prob_error"]) < 1.1' \ 'data["train/token_mult_prob_error"]["20"] < 1.1' fi - diff --git a/tests/test_suites/llm/grpo-llama3.1-8b-instruct-1n8g-megatron-fp8.sh b/tests/test_suites/llm/grpo-llama3.1-8b-instruct-1n8g-megatron-fp8.sh index e0b2f928da..31c8e4739e 100755 --- a/tests/test_suites/llm/grpo-llama3.1-8b-instruct-1n8g-megatron-fp8.sh +++ b/tests/test_suites/llm/grpo-llama3.1-8b-instruct-1n8g-megatron-fp8.sh @@ -37,4 +37,3 @@ if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | ma 'mean(data["train/token_mult_prob_error"]) < 1.1' \ 'data["train/token_mult_prob_error"]["100"] < 1.1' fi - diff --git a/tests/test_suites/llm/grpo-llama3.1-8b-instruct-2n8g-fsdp2tp1-noncolocated.sh b/tests/test_suites/llm/grpo-llama3.1-8b-instruct-2n8g-fsdp2tp1-noncolocated.sh new file mode 100755 index 0000000000..d018032576 --- /dev/null +++ b/tests/test_suites/llm/grpo-llama3.1-8b-instruct-2n8g-fsdp2tp1-noncolocated.sh @@ -0,0 +1,39 @@ +#!/bin/bash +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd) +source $SCRIPT_DIR/common.env + +# ===== BEGIN CONFIG ===== +NUM_NODES=2 +STEPS_PER_RUN=30 +MAX_STEPS=30 +NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN )) # Round up +NUM_MINUTES=120 +# ===== END CONFIG ===== + +exit_if_max_steps_reached + +# Run the experiment +cd $PROJECT_ROOT +uv run examples/run_grpo_math.py \ + --config $CONFIG_PATH \ + grpo.max_num_steps=$MAX_STEPS \ + logger.log_dir=$LOG_DIR \ + logger.wandb_enabled=True \ + logger.wandb.project=nemo-rl \ + logger.wandb.name=$EXP_NAME \ + logger.monitor_gpus=True \ + logger.tensorboard_enabled=True \ + checkpointing.enabled=True \ + checkpointing.checkpoint_dir=$CKPT_DIR \ + $@ \ + 2>&1 | tee $RUN_LOG + +# Convert tensorboard logs to json +uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS + +# Only run metrics if the target step is reached +if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then + uv run tests/check_metrics.py $JSON_METRICS \ + 'mean(data["train/token_mult_prob_error"]) < 1.1' \ + 'data["train/token_mult_prob_error"]["30"] < 1.1' +fi diff --git a/tests/test_suites/llm/grpo-llama3.1-8b-instruct-4n8g-fsdp2tp1-long.v3.sh b/tests/test_suites/llm/grpo-llama3.1-8b-instruct-4n8g-fsdp2tp1-long.v3.sh index 6e64876058..af44d060cb 100755 --- a/tests/test_suites/llm/grpo-llama3.1-8b-instruct-4n8g-fsdp2tp1-long.v3.sh +++ b/tests/test_suites/llm/grpo-llama3.1-8b-instruct-4n8g-fsdp2tp1-long.v3.sh @@ -37,4 +37,3 @@ if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | ma 'mean(data["train/token_mult_prob_error"]) < 1.1' \ 'data["train/token_mult_prob_error"]["100"] < 1.1' fi - diff --git a/tests/test_suites/llm/grpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.v3.sh b/tests/test_suites/llm/grpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.v3.sh index 3661370fa6..562ff730e7 100755 --- a/tests/test_suites/llm/grpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.v3.sh +++ b/tests/test_suites/llm/grpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.v3.sh @@ -38,4 +38,3 @@ if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | ma 'data["train/token_mult_prob_error"]["500"] < 1.1' \ 'mean(data["timing/train/total_step_time"], -6, -1) < 10' fi - diff --git a/tests/test_suites/llm/grpo-llama3.2-1b-instruct-1n8g-megatron.sh b/tests/test_suites/llm/grpo-llama3.2-1b-instruct-1n8g-megatron.sh index 83071c70e3..90e309e128 100755 --- a/tests/test_suites/llm/grpo-llama3.2-1b-instruct-1n8g-megatron.sh +++ b/tests/test_suites/llm/grpo-llama3.2-1b-instruct-1n8g-megatron.sh @@ -38,5 +38,4 @@ if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | ma 'data["train/token_mult_prob_error"]["500"] < 1.1' \ 'data["train/reward"]["500"] > 0.1' \ 'mean(data["timing/train/total_step_time"], -6, -1) < 10.5' - fi diff --git a/tests/test_suites/llm/grpo-qwen2.5-32b-32n8g-fsdp2tp8sp-actckpt-long.v3.sh b/tests/test_suites/llm/grpo-qwen2.5-32b-32n8g-fsdp2tp8sp-actckpt-long.v3.sh index 7d1fdc2858..fa7fbd5bd6 100755 --- a/tests/test_suites/llm/grpo-qwen2.5-32b-32n8g-fsdp2tp8sp-actckpt-long.v3.sh +++ b/tests/test_suites/llm/grpo-qwen2.5-32b-32n8g-fsdp2tp8sp-actckpt-long.v3.sh @@ -37,4 +37,3 @@ if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | ma 'mean(data["train/token_mult_prob_error"]) < 1.1' \ 'data["train/token_mult_prob_error"]["20"] < 1.1' fi - diff --git a/tests/test_suites/llm/grpo-qwen2.5-32b-32n8g-fsdp2tp8sp-actckpt.v3.sh b/tests/test_suites/llm/grpo-qwen2.5-32b-32n8g-fsdp2tp8sp-actckpt.v3.sh index e96f3de7fa..98591ba9b3 100755 --- a/tests/test_suites/llm/grpo-qwen2.5-32b-32n8g-fsdp2tp8sp-actckpt.v3.sh +++ b/tests/test_suites/llm/grpo-qwen2.5-32b-32n8g-fsdp2tp8sp-actckpt.v3.sh @@ -37,4 +37,3 @@ if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | ma 'mean(data["train/token_mult_prob_error"]) < 1.1' \ 'data["train/token_mult_prob_error"]["2"] < 1.1' fi - diff --git a/tests/test_suites/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp2tp4sp.v3.sh b/tests/test_suites/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp2tp4sp.v3.sh index b3071fb58e..ec613562f2 100755 --- a/tests/test_suites/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp2tp4sp.v3.sh +++ b/tests/test_suites/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp2tp4sp.v3.sh @@ -37,4 +37,3 @@ if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | ma 'mean(data["train/token_mult_prob_error"]) < 1.1' \ 'data["train/token_mult_prob_error"]["30"] < 1.1' fi - diff --git a/tests/test_suites/llm/grpo-qwen2.5-math-1.5b-instruct-1n8g-fsdp2tp1.v3.sh b/tests/test_suites/llm/grpo-qwen2.5-math-1.5b-instruct-1n8g-fsdp2tp1.v3.sh index 0a31e74590..35810c4eec 100755 --- a/tests/test_suites/llm/grpo-qwen2.5-math-1.5b-instruct-1n8g-fsdp2tp1.v3.sh +++ b/tests/test_suites/llm/grpo-qwen2.5-math-1.5b-instruct-1n8g-fsdp2tp1.v3.sh @@ -38,4 +38,3 @@ if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | ma 'data["train/token_mult_prob_error"]["450"] < 1.1' \ 'mean(data["timing/train/total_step_time"], 2) < 25' fi - diff --git a/tests/test_suites/llm/sft-llama3.1-8b-1n8g-fsdp2tp1-dynamicbatch.sh b/tests/test_suites/llm/sft-llama3.1-8b-1n8g-fsdp2tp1-dynamicbatch.sh index 76c600c648..3395f54f43 100755 --- a/tests/test_suites/llm/sft-llama3.1-8b-1n8g-fsdp2tp1-dynamicbatch.sh +++ b/tests/test_suites/llm/sft-llama3.1-8b-1n8g-fsdp2tp1-dynamicbatch.sh @@ -35,7 +35,7 @@ uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS # Only run metrics if the target step is reached if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then uv run tests/check_metrics.py $JSON_METRICS \ - 'data["train/loss"]["1"] < 0.6' \ + 'data["train/loss"]["1"] < 0.6' \ 'data["train/loss"]["250"] < 0.36' \ 'max(data["ray/node.0.gpu.0.mem_gb"]) < 70' \ 'mean(data["timing/train/total_step_time"], 2) < 10' diff --git a/tests/test_suites/llm/sft-llama3.1-8b-1n8g-fsdp2tp1-long.sh b/tests/test_suites/llm/sft-llama3.1-8b-1n8g-fsdp2tp1-long.sh index 90fd03467c..4b243e8fe9 100755 --- a/tests/test_suites/llm/sft-llama3.1-8b-1n8g-fsdp2tp1-long.sh +++ b/tests/test_suites/llm/sft-llama3.1-8b-1n8g-fsdp2tp1-long.sh @@ -37,6 +37,6 @@ if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | ma uv run tests/check_metrics.py $JSON_METRICS \ 'data["train/loss"]["1"] < 0.6' \ 'data["train/loss"]["250"] < 0.36' \ - 'max(data["ray/node.0.gpu.0.mem_gb"]) < 80' \ + 'max(data["ray/node.0.gpu.0.mem_gb"]) < 80' \ 'mean(data["timing/train/total_step_time"], 2) < 22' fi diff --git a/tests/test_suites/llm/sft-llama3.1-8b-1n8g-fsdp2tp2sp.sh b/tests/test_suites/llm/sft-llama3.1-8b-1n8g-fsdp2tp2sp.sh index 8f69d0f0b8..87ca1e9dad 100755 --- a/tests/test_suites/llm/sft-llama3.1-8b-1n8g-fsdp2tp2sp.sh +++ b/tests/test_suites/llm/sft-llama3.1-8b-1n8g-fsdp2tp2sp.sh @@ -40,4 +40,4 @@ if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | ma 'data["train/loss"]["50"] < 0.38' \ 'max(data["ray/node.0.gpu.0.mem_gb"]) < 70' \ 'mean(data["timing/train/total_step_time"], 2) < 32' -fi +fi diff --git a/tests/test_suites/llm/sft-llama3.1-8b-1n8g-megatron-seqpack.sh b/tests/test_suites/llm/sft-llama3.1-8b-1n8g-megatron-seqpack.sh index fe54af1fbd..e063b39861 100755 --- a/tests/test_suites/llm/sft-llama3.1-8b-1n8g-megatron-seqpack.sh +++ b/tests/test_suites/llm/sft-llama3.1-8b-1n8g-megatron-seqpack.sh @@ -36,4 +36,4 @@ if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | ma 'data["train/loss"]["1"] < 0.6' \ 'data["train/loss"]["250"] < 0.36' \ 'mean(data["timing/train/total_step_time"], 2) < 6' -fi +fi diff --git a/tests/test_suites/llm/sft-llama3.1-8b-1n8g-megatron.sh b/tests/test_suites/llm/sft-llama3.1-8b-1n8g-megatron.sh index bc5eae73a2..8ef0dfafe6 100755 --- a/tests/test_suites/llm/sft-llama3.1-8b-1n8g-megatron.sh +++ b/tests/test_suites/llm/sft-llama3.1-8b-1n8g-megatron.sh @@ -36,4 +36,4 @@ if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | ma 'data["train/loss"]["1"] < 0.6' \ 'data["train/loss"]["250"] < 0.36' \ 'mean(data["timing/train/total_step_time"], 2) < 20' -fi +fi diff --git a/tests/test_suites/llm/sft-llama3.2-1b-1n8g-fsdp2tp1.v3.sh b/tests/test_suites/llm/sft-llama3.2-1b-1n8g-fsdp2tp1.v3.sh index a4b44bd1f1..05305ab3f7 100755 --- a/tests/test_suites/llm/sft-llama3.2-1b-1n8g-fsdp2tp1.v3.sh +++ b/tests/test_suites/llm/sft-llama3.2-1b-1n8g-fsdp2tp1.v3.sh @@ -39,4 +39,3 @@ if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | ma 'max(data["ray/node.0.gpu.0.mem_gb"]) < 25' \ 'mean(data["timing/train/total_step_time"], -6, -1) < 0.6' fi - diff --git a/tests/test_suites/llm/sft-qwen2.5-32b-4n8g-fsdp2tp8sp-actckpt.v3.sh b/tests/test_suites/llm/sft-qwen2.5-32b-4n8g-fsdp2tp8sp-actckpt.v3.sh index d16a3d8d98..89edc6613e 100755 --- a/tests/test_suites/llm/sft-qwen2.5-32b-4n8g-fsdp2tp8sp-actckpt.v3.sh +++ b/tests/test_suites/llm/sft-qwen2.5-32b-4n8g-fsdp2tp8sp-actckpt.v3.sh @@ -40,4 +40,4 @@ if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | ma 'data["train/loss"]["1"] < 0.37' \ 'data["train/loss"]["20"] < 0.3' \ 'max(data["ray/node.0.gpu.0.mem_gb"]) < 35' -fi +fi diff --git a/tests/test_suites/nightly.txt b/tests/test_suites/nightly.txt index e6611fa8b5..f3841fef04 100644 --- a/tests/test_suites/nightly.txt +++ b/tests/test_suites/nightly.txt @@ -37,6 +37,9 @@ tests/test_suites/llm/grpo-math-qwen3-30ba3b-megatron-tp4-32k.sh # FP8 tests/test_suites/llm/grpo-llama3.1-8b-instruct-1n8g-megatron-fp8.sh +# Non-colocated +tests/test_suites/llm/grpo-llama3.1-8b-instruct-2n8g-fsdp2tp1-noncolocated.sh + ####### # SFT # #######