diff --git a/recipe/one_step_off_policy/grpo_qwen3_8b_gsm8k_fsdp2_8_8_npu.sh b/recipe/one_step_off_policy/grpo_qwen3_8b_gsm8k_fsdp2_8_8_npu.sh index 6a39b87fead..109a685f64d 100644 --- a/recipe/one_step_off_policy/grpo_qwen3_8b_gsm8k_fsdp2_8_8_npu.sh +++ b/recipe/one_step_off_policy/grpo_qwen3_8b_gsm8k_fsdp2_8_8_npu.sh @@ -60,6 +60,7 @@ python3 -m recipe.one_step_off_policy.main_ppo \ actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=1 \ actor_rollout_ref.rollout.tensor_model_parallel_size=4 \ actor_rollout_ref.rollout.name=vllm \ + actor_rollout_ref.rollout.mode=sync \ actor_rollout_ref.rollout.gpu_memory_utilization=0.8 \ actor_rollout_ref.rollout.max_num_batched_tokens=$((max_prompt_length + max_response_length)) \ actor_rollout_ref.rollout.n=8 \