Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 3 additions & 32 deletions .github/workflows/e2e_ppo_trainer_megatron.yml
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ jobs:
- name: Running GSM8K E2E training tests with 3D parallelism on 8 L20 GPUs with Megatron (Qwen) with validation and saving
run: |
ray stop --force
VAL_BEFORE_TRAIN=True TEST_FREQ=1 SAVE_FREQ=1 bash tests/e2e/run_ppo_trainer_megatron.sh
ALL_OFFLOAD=True VAL_BEFORE_TRAIN=True TEST_FREQ=1 SAVE_FREQ=1 bash tests/e2e/run_ppo_trainer_megatron.sh
- name: Running GSM8K E2E training tests with 3D parallelism on 8 L20 GPUs with Megatron (Qwen) after resuming
run: |
ray stop --force
Expand Down Expand Up @@ -107,7 +107,7 @@ jobs:
- name: Running GSM8K E2E training tests with 3D parallelism on 8 L20 GPUs with Megatron (DeepSeek)
run: |
ray stop --force
SAVE_FREQ=1 MODEL_ID=deepseek-ai/deepseek-coder-1.3b-instruct bash tests/e2e/run_ppo_trainer_megatron.sh
ALL_OFFLOAD=True SAVE_FREQ=1 MODEL_ID=deepseek-ai/deepseek-coder-1.3b-instruct bash tests/e2e/run_ppo_trainer_megatron.sh
- name: Running GSM8K E2E training tests with 3D parallelism on 8 L20 GPUs with Megatron (DeepSeek)
run: |
ray stop --force
Expand Down Expand Up @@ -149,7 +149,7 @@ jobs:
- name: Running GSM8K E2E training tests with 3D parallelism on 8 L20 GPUs with Megatron (Qwen3) with validation and saving
run: |
ray stop --force
VAL_BEFORE_TRAIN=True TEST_FREQ=1 SAVE_FREQ=1 MODEL_ID=Qwen/Qwen3-0.6B bash tests/e2e/run_ppo_trainer_megatron.sh
ALL_OFFLOAD=True VAL_BEFORE_TRAIN=True TEST_FREQ=1 SAVE_FREQ=1 MODEL_ID=Qwen/Qwen3-0.6B bash tests/e2e/run_ppo_trainer_megatron.sh
- name: Running GSM8K E2E training tests with 3D parallelism on 8 L20 GPUs with Megatron (Qwen3) after resuming
run: |
ray stop --force
Expand Down Expand Up @@ -305,34 +305,5 @@ jobs:
- name: clean up
run: |
rm -rf checkpoints
e2e_ppo_trainer_megatron-offload-qwen3:
runs-on: [L20x8]
timeout-minutes: 30 # Increase this timeout value as needed
env:
HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
HF_ENDPOINT: "https://hf-mirror.com"
HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
container:
image: whatcanyousee/verl:ngc-cu124-vllm0.8.5-sglang0.4.6-mcore0.12.0-te2.3
options: --gpus all --shm-size=10g
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
fetch-depth: 0
- name: Install the current repository
run: |
pip3 install --no-deps -e .[test]
- name: Prepare GSM8K dataset
run: |
python3 examples/data_preprocess/gsm8k.py
- name: Running GRPO GSM8K E2E training tests with 3D parallelism on 8 L20 GPUs with Megatron and Offload
run: |
ray stop --force
COMMON_PARAM_OFFLOAD=True COMMON_GRAD_OFFLOAD=True COMMON_OPTIMIZER_OFFLOAD=True ADV_ESTIMATOR=grpo MODEL_ID=Qwen/Qwen3-0.6B bash tests/e2e/run_ppo_trainer_megatron.sh
- name: clean up
run: |
rm -rf checkpoints


11 changes: 4 additions & 7 deletions tests/e2e/run_ppo_trainer_megatron.sh
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,10 @@ RM_VPP=${RM_VPP:-$COMMON_VPP}
RM_CP=${RM_CP:-$COMMON_CP}
RM_TP=${RM_TP:-$TRAIN_TP}

COMMON_PARAM_OFFLOAD=${COMMON_PARAM_OFFLOAD:-False}
COMMON_GRAD_OFFLOAD=${COMMON_GRAD_OFFLOAD:-False}
COMMON_OPTIMIZER_OFFLOAD=${COMMON_OPTIMIZER_OFFLOAD:-False}
ALL_OFFLOAD=${ALL_OFFLOAD:-False}
COMMON_PARAM_OFFLOAD=${COMMON_PARAM_OFFLOAD:-$ALL_OFFLOAD}
COMMON_GRAD_OFFLOAD=${COMMON_GRAD_OFFLOAD:-$ALL_OFFLOAD}
COMMON_OPTIMIZER_OFFLOAD=${COMMON_OPTIMIZER_OFFLOAD:-$ALL_OFFLOAD}

ACTOR_PARAM_OFFLOAD=${ACTOR_PARAM_OFFLOAD:-$COMMON_PARAM_OFFLOAD}
ACTOR_GRAD_OFFLOAD=${ACTOR_GRAD_OFFLOAD:-$COMMON_GRAD_OFFLOAD}
Expand All @@ -66,9 +67,7 @@ REF_PARAM_OFFLOAD=${REF_PARAM_OFFLOAD:-$COMMON_PARAM_OFFLOAD}
CRITIC_PARAM_OFFLOAD=${CRITIC_PARAM_OFFLOAD:-$COMMON_PARAM_OFFLOAD}
CRITIC_GRAD_OFFLOAD=${CRITIC_GRAD_OFFLOAD:-$COMMON_GRAD_OFFLOAD}
CRITIC_OPTIMIZER_OFFLOAD=${CRITIC_OPTIMIZER_OFFLOAD:-$COMMON_OPTIMIZER_OFFLOAD}
RM_GRAD_OFFLOAD=${RM_GRAD_OFFLOAD:-$COMMON_GRAD_OFFLOAD}
RM_PARAM_OFFLOAD=${RM_PARAM_OFFLOAD:-$COMMON_PARAM_OFFLOAD}
RM_OPTIMIZER_OFFLOAD=${RM_OPTIMIZER_OFFLOAD:-$COMMON_OPTIMIZER_OFFLOAD}

CHECKPOINT_CONTENTS=['model','hf_model','optimizer','extra']
SKIP_SAVE_HF_MODEL=${SKIP_SAVE_HF_MODEL:-0}
Expand Down Expand Up @@ -134,8 +133,6 @@ python3 -m verl.trainer.main_ppo --config-path=config \
reward_model.megatron.context_parallel_size=$RM_CP \
reward_model.megatron.tensor_model_parallel_size=$RM_TP \
reward_model.megatron.param_offload=${RM_PARAM_OFFLOAD} \
reward_model.megatron.optimizer_offload=${RM_OPTIMIZER_OFFLOAD} \
reward_model.megatron.grad_offload=${RM_GRAD_OFFLOAD} \
algorithm.use_kl_in_reward=False \
algorithm.kl_penalty=kl \
algorithm.kl_ctrl.kl_coef=0.001 \
Expand Down
2 changes: 0 additions & 2 deletions verl/trainer/config/ppo_megatron_trainer.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -227,8 +227,6 @@ reward_model:
strategy: megatron
megatron:
param_offload: False
grad_offload: False
optimizer_offload: False
tensor_model_parallel_size: 1
expert_model_parallel_size: 1
expert_tensor_parallel_size: null
Expand Down