diff --git a/.github/workflows/e2e_ppo_trainer_megatron.yml b/.github/workflows/e2e_ppo_trainer_megatron.yml index d2fd6c0f252..b932657e699 100644 --- a/.github/workflows/e2e_ppo_trainer_megatron.yml +++ b/.github/workflows/e2e_ppo_trainer_megatron.yml @@ -65,7 +65,7 @@ jobs: - name: Running GSM8K E2E training tests with 3D parallelism on 8 L20 GPUs with Megatron (Qwen) with validation and saving run: | ray stop --force - VAL_BEFORE_TRAIN=True TEST_FREQ=1 SAVE_FREQ=1 bash tests/e2e/run_ppo_trainer_megatron.sh + ALL_OFFLOAD=True VAL_BEFORE_TRAIN=True TEST_FREQ=1 SAVE_FREQ=1 bash tests/e2e/run_ppo_trainer_megatron.sh - name: Running GSM8K E2E training tests with 3D parallelism on 8 L20 GPUs with Megatron (Qwen) after resuming run: | ray stop --force @@ -107,7 +107,7 @@ jobs: - name: Running GSM8K E2E training tests with 3D parallelism on 8 L20 GPUs with Megatron (DeepSeek) run: | ray stop --force - SAVE_FREQ=1 MODEL_ID=deepseek-ai/deepseek-coder-1.3b-instruct bash tests/e2e/run_ppo_trainer_megatron.sh + ALL_OFFLOAD=True SAVE_FREQ=1 MODEL_ID=deepseek-ai/deepseek-coder-1.3b-instruct bash tests/e2e/run_ppo_trainer_megatron.sh - name: Running GSM8K E2E training tests with 3D parallelism on 8 L20 GPUs with Megatron (DeepSeek) run: | ray stop --force @@ -149,7 +149,7 @@ jobs: - name: Running GSM8K E2E training tests with 3D parallelism on 8 L20 GPUs with Megatron (Qwen3) with validation and saving run: | ray stop --force - VAL_BEFORE_TRAIN=True TEST_FREQ=1 SAVE_FREQ=1 MODEL_ID=Qwen/Qwen3-0.6B bash tests/e2e/run_ppo_trainer_megatron.sh + ALL_OFFLOAD=True VAL_BEFORE_TRAIN=True TEST_FREQ=1 SAVE_FREQ=1 MODEL_ID=Qwen/Qwen3-0.6B bash tests/e2e/run_ppo_trainer_megatron.sh - name: Running GSM8K E2E training tests with 3D parallelism on 8 L20 GPUs with Megatron (Qwen3) after resuming run: | ray stop --force @@ -305,34 +305,5 @@ jobs: - name: clean up run: | rm -rf checkpoints - e2e_ppo_trainer_megatron-offload-qwen3: - runs-on: [L20x8] - timeout-minutes: 30 # Increase this timeout value as needed - env: - HTTP_PROXY: ${{ secrets.PROXY_HTTP }} - HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }} - NO_PROXY: "localhost,127.0.0.1,hf-mirror.com" - HF_ENDPOINT: "https://hf-mirror.com" - HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable - container: - image: whatcanyousee/verl:ngc-cu124-vllm0.8.5-sglang0.4.6-mcore0.12.0-te2.3 - options: --gpus all --shm-size=10g - steps: - - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - with: - fetch-depth: 0 - - name: Install the current repository - run: | - pip3 install --no-deps -e .[test] - - name: Prepare GSM8K dataset - run: | - python3 examples/data_preprocess/gsm8k.py - - name: Running GRPO GSM8K E2E training tests with 3D parallelism on 8 L20 GPUs with Megatron and Offload - run: | - ray stop --force - COMMON_PARAM_OFFLOAD=True COMMON_GRAD_OFFLOAD=True COMMON_OPTIMIZER_OFFLOAD=True ADV_ESTIMATOR=grpo MODEL_ID=Qwen/Qwen3-0.6B bash tests/e2e/run_ppo_trainer_megatron.sh - - name: clean up - run: | - rm -rf checkpoints diff --git a/tests/e2e/run_ppo_trainer_megatron.sh b/tests/e2e/run_ppo_trainer_megatron.sh index 7ca8d87b4ea..82b0582c3da 100644 --- a/tests/e2e/run_ppo_trainer_megatron.sh +++ b/tests/e2e/run_ppo_trainer_megatron.sh @@ -55,9 +55,10 @@ RM_VPP=${RM_VPP:-$COMMON_VPP} RM_CP=${RM_CP:-$COMMON_CP} RM_TP=${RM_TP:-$TRAIN_TP} -COMMON_PARAM_OFFLOAD=${COMMON_PARAM_OFFLOAD:-False} -COMMON_GRAD_OFFLOAD=${COMMON_GRAD_OFFLOAD:-False} -COMMON_OPTIMIZER_OFFLOAD=${COMMON_OPTIMIZER_OFFLOAD:-False} +ALL_OFFLOAD=${ALL_OFFLOAD:-False} +COMMON_PARAM_OFFLOAD=${COMMON_PARAM_OFFLOAD:-$ALL_OFFLOAD} +COMMON_GRAD_OFFLOAD=${COMMON_GRAD_OFFLOAD:-$ALL_OFFLOAD} +COMMON_OPTIMIZER_OFFLOAD=${COMMON_OPTIMIZER_OFFLOAD:-$ALL_OFFLOAD} ACTOR_PARAM_OFFLOAD=${ACTOR_PARAM_OFFLOAD:-$COMMON_PARAM_OFFLOAD} ACTOR_GRAD_OFFLOAD=${ACTOR_GRAD_OFFLOAD:-$COMMON_GRAD_OFFLOAD} @@ -66,9 +67,7 @@ REF_PARAM_OFFLOAD=${REF_PARAM_OFFLOAD:-$COMMON_PARAM_OFFLOAD} CRITIC_PARAM_OFFLOAD=${CRITIC_PARAM_OFFLOAD:-$COMMON_PARAM_OFFLOAD} CRITIC_GRAD_OFFLOAD=${CRITIC_GRAD_OFFLOAD:-$COMMON_GRAD_OFFLOAD} CRITIC_OPTIMIZER_OFFLOAD=${CRITIC_OPTIMIZER_OFFLOAD:-$COMMON_OPTIMIZER_OFFLOAD} -RM_GRAD_OFFLOAD=${RM_GRAD_OFFLOAD:-$COMMON_GRAD_OFFLOAD} RM_PARAM_OFFLOAD=${RM_PARAM_OFFLOAD:-$COMMON_PARAM_OFFLOAD} -RM_OPTIMIZER_OFFLOAD=${RM_OPTIMIZER_OFFLOAD:-$COMMON_OPTIMIZER_OFFLOAD} CHECKPOINT_CONTENTS=['model','hf_model','optimizer','extra'] SKIP_SAVE_HF_MODEL=${SKIP_SAVE_HF_MODEL:-0} @@ -134,8 +133,6 @@ python3 -m verl.trainer.main_ppo --config-path=config \ reward_model.megatron.context_parallel_size=$RM_CP \ reward_model.megatron.tensor_model_parallel_size=$RM_TP \ reward_model.megatron.param_offload=${RM_PARAM_OFFLOAD} \ - reward_model.megatron.optimizer_offload=${RM_OPTIMIZER_OFFLOAD} \ - reward_model.megatron.grad_offload=${RM_GRAD_OFFLOAD} \ algorithm.use_kl_in_reward=False \ algorithm.kl_penalty=kl \ algorithm.kl_ctrl.kl_coef=0.001 \ diff --git a/verl/trainer/config/ppo_megatron_trainer.yaml b/verl/trainer/config/ppo_megatron_trainer.yaml index 2b1a0941594..636e287b8b7 100644 --- a/verl/trainer/config/ppo_megatron_trainer.yaml +++ b/verl/trainer/config/ppo_megatron_trainer.yaml @@ -227,8 +227,6 @@ reward_model: strategy: megatron megatron: param_offload: False - grad_offload: False - optimizer_offload: False tensor_model_parallel_size: 1 expert_model_parallel_size: 1 expert_tensor_parallel_size: null