verl-project · ETOgaosion · May 16, 2025 · May 10, 2025 · May 10, 2025 · May 10, 2025
diff --git a/.github/workflows/e2e_ppo_trainer.yml b/.github/workflows/e2e_ppo_trainer.yml
@@ -83,18 +83,26 @@ jobs:
           ray stop --force
           python3 examples/data_preprocess/gsm8k.py
       # Function RM
-      - name: Running GSM8K E2E training tests on 8 L20 GPUs with rmpad using function rm with validation and saving
+      - name: Running GSM8K E2E training tests on 8 L20 GPUs with rmpad using function rm with validation and saving (FSDP_SIZE=8)
         run: |
           ray stop --force
-          VAL_BEFORE_TRAIN=True TEST_FREQ=1 SAVE_FREQ=1 SAVE_HF_MODEL=True bash tests/e2e/ppo_trainer/run_function_reward.sh
+          VAL_BEFORE_TRAIN=True TEST_FREQ=1 SAVE_FREQ=1 SAVE_HF_MODEL=True VERL_EXP_NAME="qwen2.5-0.5b-function-reward-minimal-fsdp8" bash tests/e2e/ppo_trainer/run_function_reward.sh
       - name: Running GSM8K E2E training tests on 8 L20 GPUs with rmpad using function rm after resuming
         run: |
           ray stop --force
-          RESUME_MODE=auto bash tests/e2e/ppo_trainer/run_function_reward.sh
-      - name: Test FSDP checkpoints merging function (Qwen Actor) 
+          RESUME_MODE=auto VERL_EXP_NAME="qwen2.5-0.5b-function-reward-minimal-fsdp8" bash tests/e2e/ppo_trainer/run_function_reward.sh
+      - name: Test merging FSDP checkpoints (Qwen Actor) 
         run: |
-          exp_name="qwen2.5-0.5b-function-reward-minimal"
-          python scripts/model_merger.py --backend fsdp --hf_model_path ~/models/Qwen/Qwen2.5-0.5B --local_dir checkpoints/verl-test/${exp_name}/global_step_1/actor --test --test_hf_dir checkpoints/verl-test/${exp_name}/global_step_1/actor/huggingface
+          exp_name="qwen2.5-0.5b-function-reward-minimal-fsdp8"
+          python scripts/model_merger.py test --backend fsdp --local_dir checkpoints/verl-test/${exp_name}/global_step_1/actor --test_hf_dir checkpoints/verl-test/${exp_name}/global_step_1/actor/huggingface
+      - name: Running GSM8K E2E training tests on 8 L20 GPUs with rmpad using function rm with validation and saving (DDP_SIZE=2, FSDP_SIZE=4)
+        run: |
+          ray stop --force
+          VAL_BEFORE_TRAIN=True TEST_FREQ=1 SAVE_FREQ=1 SAVE_HF_MODEL=True FSDP_SIZE=4 VERL_EXP_NAME="qwen2.5-0.5b-function-reward-minimal-ddp2-fsdp4" bash tests/e2e/ppo_trainer/run_function_reward.sh
+      - name: Test merging DDP+FSDP checkpoints (Qwen Actor) 
+        run: |
+          exp_name="qwen2.5-0.5b-function-reward-minimal-ddp2-fsdp4"
+          python scripts/model_merger.py test --backend fsdp --local_dir checkpoints/verl-test/${exp_name}/global_step_1/actor --test_hf_dir checkpoints/verl-test/${exp_name}/global_step_1/actor/huggingface
       - name: Running GSM8K E2E without rmpad using function rm
         run: |
           ray stop --force

diff --git a/.github/workflows/e2e_ppo_trainer_megatron.yml b/.github/workflows/e2e_ppo_trainer_megatron.yml
@@ -73,8 +73,8 @@ jobs:
       - name: Test Megatron checkpoints merging function (Qwen Actor and Critic)
         run: |
           exp_name="qwen2.5-0.5b-megatron-gsm8k-minimal"
-          python scripts/model_merger.py --backend megatron --tie-word-embedding --hf_model_path Qwen/Qwen2.5-0.5B --local_dir checkpoints/verl-test/${exp_name}/global_step_1/actor --test --test_hf_dir checkpoints/verl-test/${exp_name}/global_step_1/actor/huggingface
-          python scripts/model_merger.py --backend megatron --is-value-model --hf_model_path Qwen/Qwen2.5-0.5B --local_dir checkpoints/verl-test/${exp_name}/global_step_1/critic --test --test_hf_dir checkpoints/verl-test/${exp_name}/global_step_1/critic/huggingface
+          python scripts/model_merger.py test --backend megatron --tie-word-embedding --local_dir checkpoints/verl-test/${exp_name}/global_step_1/actor --test_hf_dir checkpoints/verl-test/${exp_name}/global_step_1/actor/huggingface --hf_model_path Qwen/Qwen2.5-0.5B
+          python scripts/model_merger.py test --backend megatron --is-value-model --local_dir checkpoints/verl-test/${exp_name}/global_step_1/critic --test_hf_dir checkpoints/verl-test/${exp_name}/global_step_1/critic/huggingface --hf_model_path Qwen/Qwen2.5-0.5B
       - name: Running GRPO GSM8K E2E training tests with 3D parallelism on 8 L20 GPUs with Megatron (Qwen)
         run: |
           ray stop --force
@@ -119,8 +119,8 @@ jobs:
       - name: Test Megatron checkpoints merging function (DeepSeek Actor and Critic)
         run: |
           exp_name="deepseek-coder-1.3b-instruct-megatron-gsm8k-minimal"
-          python scripts/model_merger.py --backend megatron --hf_model_path deepseek-ai/deepseek-coder-1.3b-instruct --local_dir checkpoints/verl-test/${exp_name}/global_step_1/actor --test --test_hf_dir checkpoints/verl-test/${exp_name}/global_step_1/actor/huggingface
-          python scripts/model_merger.py --backend megatron --is-value-model --hf_model_path deepseek-ai/deepseek-coder-1.3b-instruct --local_dir checkpoints/verl-test/${exp_name}/global_step_1/critic --test --test_hf_dir checkpoints/verl-test/${exp_name}/global_step_1/critic/huggingface
+          python scripts/model_merger.py test --backend megatron --local_dir checkpoints/verl-test/${exp_name}/global_step_1/actor --test_hf_dir checkpoints/verl-test/${exp_name}/global_step_1/actor/huggingface --hf_model_path deepseek-ai/deepseek-coder-1.3b-instruct
+          python scripts/model_merger.py test --backend megatron --is-value-model --local_dir checkpoints/verl-test/${exp_name}/global_step_1/critic --test_hf_dir checkpoints/verl-test/${exp_name}/global_step_1/critic/huggingface --hf_model_path deepseek-ai/deepseek-coder-1.3b-instruct
       - name: clean up
         run: |
           rm -rf checkpoints
@@ -256,8 +256,8 @@ jobs:
       - name: Test Megatron checkpoints merging function (Qwen3 Actor and Critic)
         run: |
           exp_name="qwen3-0.6b-megatron-gsm8k-minimal"
-          python scripts/model_merger.py --backend megatron --tie-word-embedding --hf_model_path Qwen/Qwen3-0.6B --local_dir checkpoints/verl-test/${exp_name}/global_step_1/actor --test --test_hf_dir checkpoints/verl-test/${exp_name}/global_step_1/actor/huggingface
-          python scripts/model_merger.py --backend megatron --is-value-model --hf_model_path Qwen/Qwen3-0.6B --local_dir checkpoints/verl-test/${exp_name}/global_step_1/critic --test --test_hf_dir checkpoints/verl-test/${exp_name}/global_step_1/critic/huggingface
+          python scripts/model_merger.py test --backend megatron --tie-word-embedding --hf_model_path Qwen/Qwen3-0.6B --local_dir checkpoints/verl-test/${exp_name}/global_step_1/actor --test_hf_dir checkpoints/verl-test/${exp_name}/global_step_1/actor/huggingface
+          python scripts/model_merger.py test --backend megatron --is-value-model --hf_model_path Qwen/Qwen3-0.6B --local_dir checkpoints/verl-test/${exp_name}/global_step_1/critic --test_hf_dir checkpoints/verl-test/${exp_name}/global_step_1/critic/huggingface
       - name: Running GRPO GSM8K E2E training tests with 3D parallelism on 8 L20 GPUs with Megatron (Qwen3)
         run: |
           ray stop --force

@@ -67,27 +67,50 @@ Convert FSDP and Megatron Checkpoints to HuggingFace Format Model
 We provide a tool to convert the FSDP and Megatron checkpoints to HuggingFace format model.
 The tool is located in ``scripts/model_merger.py``.
 
-The arguments are as follows:
+The script supports two main sub-commands: `merge` (to convert and save checkpoints) and `test` (to validate merged checkpoints against a reference model).
+The arguments for the `merge` sub-command are as follows:
 
 .. code:: bash
 
-    usage: model_merger.py [-h] [--backend {fsdp,megatron}]
-                           [--tie-word-embedding whether the model share embedding weights]
-                           [--is-value-model whether the model is critic model]
-                           [--hf_model_path $original_model_path, like {Qwen/Qwen2-7B}]
-                           [--local_dir $local_directory saved fsdp or megatron models]
-                           [--target_dir $target_dir to save converted models, default is tmp]
-                           [--hf_upload_path $huggingface_repo to upload]
-
-So example use of Megatron model merger is:
+    usage: model_merger.py merge [-h] --backend {fsdp,megatron} --local_dir LOCAL_DIR [--hf_model_path HF_MODEL_PATH]
+                                [--tie-word-embedding] [--is-value-model] [--target_dir TARGET_DIR]
+                                [--hf_upload_path HF_UPLOAD_PATH] [--private]
+
+    options:
+    -h, --help            show this help message and exit
+    --backend {fsdp,megatron}
+                            The backend of the model
+    --local_dir LOCAL_DIR
+                            Path to the saved model checkpoints
+    --hf_model_path HF_MODEL_PATH
+                            (Deprecated) Path to the original Hugging Face model for config.
+    --tie-word-embedding  Whether to tie word embedding weights (currently only Megatron supported)
+    --is-value-model      Whether the model is a value model (currently only Megatron supported)
+    --target_dir TARGET_DIR
+                            Directory to save the merged huggingface model
+    --hf_upload_path HF_UPLOAD_PATH
+                            Hugging Face repository ID to upload the model
+    --private             Whether to upload the model to a private Hugging Face repository
+
+Example usage for merging Megatron checkpoints:
 
 .. code:: bash
 
-    python scripts/model_merger.py \
+    python scripts/model_merger.py merge \
         --backend megatron \
         --tie-word-embedding \
-        --hf_model_path Qwen/Qwen2.5-0.5B \
-        --local_dir checkpoints/verl_megatron_gsm8k_examples/qwen2_5_0b5_megatron_saveload/global_step_1/actor
+        --local_dir checkpoints/verl_megatron_gsm8k_examples/qwen2_5_0b5_megatron_saveload/global_step_1/actor \
+        --target_dir /path/to/merged_hf_model
+
+Example usage for merging FSDP checkpoints:
+
+.. code:: bash
+
+    python scripts/model_merger.py merge \
+        --backend fsdp \
+        --local_dir checkpoints/verl_fsdp_gsm8k_examples/qwen2_5_0b5_fsdp_saveload/global_step_1/actor \
+        --target_dir /path/to/merged_hf_model
+
 
 Megatron Merger details
 -----------------------