diff --git a/examples/configs/grpo_math_70B_megatron.yaml b/examples/configs/grpo_math_70B_megatron.yaml index 1aaad35659..1317e45a04 100644 --- a/examples/configs/grpo_math_70B_megatron.yaml +++ b/examples/configs/grpo_math_70B_megatron.yaml @@ -62,7 +62,7 @@ policy: stop_strings: null vllm_cfg: tensor_parallel_size: 4 - gpu_memory_utilization: 0.8 + gpu_memory_utilization: 0.6 max_model_len: ${policy.max_total_sequence_length} cluster: diff --git a/examples/configs/grpo_math_8B_megatron.yaml b/examples/configs/grpo_math_8B_megatron.yaml index fc839c8239..62ec41359c 100644 --- a/examples/configs/grpo_math_8B_megatron.yaml +++ b/examples/configs/grpo_math_8B_megatron.yaml @@ -67,7 +67,7 @@ policy: stop_strings: null vllm_cfg: tensor_parallel_size: 1 - gpu_memory_utilization: 0.8 + gpu_memory_utilization: 0.6 max_model_len: ${policy.max_total_sequence_length} cluster: diff --git a/examples/configs/grpo_math_qwen30ba3b_megatron.yaml b/examples/configs/grpo_math_qwen30ba3b_megatron.yaml index 8ebd93e7a1..84d6736cec 100644 --- a/examples/configs/grpo_math_qwen30ba3b_megatron.yaml +++ b/examples/configs/grpo_math_qwen30ba3b_megatron.yaml @@ -29,11 +29,11 @@ policy: enabled: true empty_unused_memory_level: 1 converter_type: "LlamaForCausalLM" - tensor_model_parallel_size: 4 - pipeline_model_parallel_size: 4 + tensor_model_parallel_size: 2 + pipeline_model_parallel_size: 1 context_parallel_size: 1 expert_tensor_parallel_size: 1 - expert_model_parallel_size: 4 + expert_model_parallel_size: 8 sequence_parallel: True pipeline_dtype: ${policy.precision} @@ -68,7 +68,8 @@ policy: stop_strings: null vllm_cfg: tensor_parallel_size: 4 - gpu_memory_utilization: 0.8 + gpu_memory_utilization: 0.7 + enforce_eager: false max_model_len: ${policy.max_total_sequence_length} cluster: