diff --git a/examples/grpo_trainer/run_qwen3moe-30b_megatron_96gb.sh b/examples/grpo_trainer/run_qwen3moe-30b_megatron_96gb.sh index 7bf06e3ada5..6937db5fcfa 100644 --- a/examples/grpo_trainer/run_qwen3moe-30b_megatron_96gb.sh +++ b/examples/grpo_trainer/run_qwen3moe-30b_megatron_96gb.sh @@ -168,7 +168,7 @@ python3 -m verl.trainer.main_ppo --config-path=./config --config-name='ppo_megat actor_rollout_ref.rollout.free_cache_engine=True \ actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=${infer_ppo_micro_batch_size_per_gpu} \ actor_rollout_ref.ref.log_prob_max_token_len_per_gpu=${infer_ppo_max_token_len} \ - actor_rollout_ref.ref.megatron.use_dist_checkpointing=True \ + actor_rollout_ref.ref.megatron.use_dist_checkpointing=${USE_DIST_CKPT} \ actor_rollout_ref.ref.megatron.param_offload=${offload} \ actor_rollout_ref.ref.megatron.tensor_model_parallel_size=${REF_TP} \ actor_rollout_ref.ref.megatron.pipeline_model_parallel_size=${REF_PP} \ @@ -192,4 +192,4 @@ python3 -m verl.trainer.main_ppo --config-path=./config --config-name='ppo_megat trainer.save_freq=100 \ trainer.total_epochs=10 \ trainer.resume_mode=auto \ - trainer.log_val_generations=10 \ No newline at end of file + trainer.log_val_generations=10