volcengine · eric-haibin-lin · Apr 29, 2025 · Apr 2, 2025 · Apr 4, 2025 · Apr 4, 2025
diff --git a/.github/workflows/sgl.yml b/.github/workflows/sgl.yml
@@ -0,0 +1,58 @@
+name: sgl
+
+on:
+  # Trigger the workflow on push or pull request,
+  # but only for the main branch
+  push:
+    branches:
+      - main
+      - v0.2.x
+    paths:
+      - "**/*.py"
+      - .github/workflows/vllm.yml
+  pull_request:
+    branches:
+      - main
+      - v0.2.x
+    paths:
+      - "**/*.py"
+      - "verl/trainer/config/*.yaml"
+      - .github/workflows/sgl.yml
+
+# Cancel jobs on the same ref if a new one is triggered
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
+
+# Declare permissions just read content.
+permissions: 
+  contents: read
+
+jobs:
+  sgl:
+    runs-on: [self-hosted, l20-0]
+    timeout-minutes: 20 # Increase this timeout value as needed
+    env:
+      HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
+      HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
+      NO_PROXY: "localhost,127.0.0.1"
+      HF_HUB_ENABLE_HF_TRANSFER: 1
+    container:
+      image: ocss884/verl-sglang:ngc-th2.5.1-cu126-sglang0.4.4.post3
+      options: --gpus all --shm-size=10g
+    steps:
+      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        with:
+            fetch-depth: 0
+      - name: Install the current repository
+        run: |
+          pip3 install hf_transfer
+          pip3 install -e .[test,gpu,sglang] --no-deps
+      - name: Test the latest SGLang
+        run: |
+          cd tests/rollout
+          torchrun --standalone --nnodes=1 --nproc_per_node=4 $(which pytest) -s test_sglang_spmd.py
+      - name: Test the latest SGLang async
+        run: |
+          cd tests/rollout
+          torchrun --standalone --nnodes=1 --nproc_per_node=4 $(which pytest) -s test_async_sglang_spmd.py
@@ -0,0 +1,40 @@
+Multi-turn Rollout Support
+=========================
+
+Basic Configuration
+~~~~~~~~~~~~~~~~~
+
+To enable multi-turn rollout, make sure to configure the following fields in your rollout configuration:
+
+.. code-block:: yaml
+
+    actor_rollout_ref: 
+        rollout: 
+            multi_turn: True
+            name: "sglang_async"
+
+These configuration activates the sglang_async engine for multi-turn interaction during rollout.
+
+Custom Tool Configuration
+~~~~~~~~~~~~~~~~~~~~~~~
+
+For custom environment interaction tools, you can specify your tool configurations in a YAML file.  
+To do so, use the following format in your rollout config:
+
+.. code-block:: yaml
+
+    actor_rollout_ref:
+        rollout:
+            tool_kwargs:
+                tools_config_file: <path_to_tool_yaml_file>
+
+This allows integration of customized tool behaviors during actor rollout steps. You may refer to the GSM8KTool_example_configuration_ for guidance.
+
+GSM8K Multi-turn Training Performance  
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+See the training performance of multi-turn rollout on the GSM8K task HERE_.
+
+.. _HERE: https://wandb.ai/zhaochenyang20/gsm8k_async_rl/runs/1ro1r7om?nw=nwuserzhaochenyang20
+
+.. _GSM8KTool_example_configuration: ../../examples/sglang_multiturn/config/tool_config/gsm8k_tool_config.yaml
diff --git a/examples/sglang_multiturn/config/gsm8k_multiturn_grpo.yaml b/examples/sglang_multiturn/config/gsm8k_multiturn_grpo.yaml
@@ -0,0 +1,22 @@
+hydra:
+  searchpath:
+    - file://verl/trainer/config
+
+defaults:
+  - ppo_trainer
+  - _self_
+
+data:
+  max_prompt_length: 1024
+  max_response_length: 1024
+  train_batch_size: 256
+  return_raw_chat: True
+
+actor_rollout_ref:
+  hybrid_engine: True
+  rollout:
+    name: sglang_async
+    multi_turn:
+      enable: True
+      max_turns: 5
+      # tool_config_path: "./config/tool_config/gsm8k_tool_config.yaml"
diff --git a/examples/sglang_multiturn/config/tool_config/gsm8k_tool_config.yaml b/examples/sglang_multiturn/config/tool_config/gsm8k_tool_config.yaml
@@ -0,0 +1,15 @@
+tools:
+  - class_name: "verl.workers.tool.gsm8k_tool.Gsm8kTool"
+    config: {}
+    tool_schema:
+      type: "function"
+      function:
+        name: "calc_gsm8k_reward"
+        description: "A tool for calculating the reward of gsm8k. (1.0 if your answer is correct, 0.0 if your answer is incorrect)"
+        parameters:
+          type: "object"
+          properties:
+            answer:
+              type: "string"
+              description: "The model's answer to the GSM8K math problem, must be a digits"
+          required: ["answer"]
diff --git a/examples/sglang_multiturn/gsm8k.py b/examples/sglang_multiturn/gsm8k.py
@@ -0,0 +1,106 @@
+# Copyright 2024 Bytedance Ltd. and/or its affiliates
+# Copyright 2023-2024 SGLang Team and ModelBest Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Preprocess the GSM8k dataset to parquet format
+"""
+
+import argparse
+import os
+import re
+
+import datasets
+
+from verl.utils.hdfs_io import copy, makedirs
+
+
+def extract_solution(solution_str):
+    solution = re.search("#### (\\-?[0-9\\.\\,]+)", solution_str)
+    assert solution is not None
+    final_solution = solution.group(0)
+    final_solution = final_solution.split("#### ")[1].replace(",", "")
+    return final_solution
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--local_dir", default="~/data/gsm8k")
+    parser.add_argument("--hdfs_dir", default=None)
+
+    args = parser.parse_args()
+
+    data_source = "openai/gsm8k"
+    dataset = datasets.load_dataset(data_source, "main")
+
+    train_dataset = dataset["train"]
+    test_dataset = dataset["test"]
+
+    instruction_following = "You must use the `calc_gsm8k_reward` tool to calculate the reward of your answer(1.0 if your answer is correct, 0.0 if your answer is incorrect) before submitting it at least once and refine your answer if necessary. Put your final answer in the format of `#### <answer>`."
+
+    # add a row to each data item that represents a unique id
+    def make_map_fn(split):
+        def process_fn(example, idx):
+            question_raw = example.pop("question")
+
+            question = question_raw + " " + instruction_following
+
+            answer_raw = example.pop("answer")
+            solution = extract_solution(answer_raw)
+            data = {
+                "data_source": data_source,
+                "prompt": [
+                    {
+                        "role": "system",
+                        "content": "You are a math expert. You are given a question and you need to solve it step by step.  `calc_gsm8k_reward` is a tool for calculating the reward of gsm8k. You should use this tool to calculate the reward of your answer(1.0 if your answer is correct, 0.0 if your answer is incorrect) before submitting it and refine your answer if necessary. Put your final answer in the format of `#### <answer>`.",
+                    },
+                    {
+                        "role": "user",
+                        "content": question,
+                    },
+                ],
+                "ability": "math",
+                "reward_model": {"style": "rule", "ground_truth": solution},
+                "extra_info": {
+                    "split": split,
+                    "index": idx,
+                    "answer": answer_raw,
+                    "question": question_raw,
+                    "need_tools_kwargs": True,
+                    "tools_kwargs": {
+                        "calc_gsm8k_reward": {
+                            "create_kwargs": {"ground_truth": solution},
+                            # "execute_kwargs": {},
+                            # "calc_reward_kwargs": {},
+                            # "release_kwargs": {},
+                        },
+                    },
+                },
+            }
+            return data
+
+        return process_fn
+
+    train_dataset = train_dataset.map(function=make_map_fn("train"), with_indices=True)
+    test_dataset = test_dataset.map(function=make_map_fn("test"), with_indices=True)
+
+    local_dir = args.local_dir
+    hdfs_dir = args.hdfs_dir
+
+    train_dataset.to_parquet(os.path.join(local_dir, "train.parquet"))
+    test_dataset.to_parquet(os.path.join(local_dir, "test.parquet"))
+
+    if hdfs_dir is not None:
+        makedirs(hdfs_dir)
+
+        copy(src=local_dir, dst=hdfs_dir)
diff --git a/examples/sglang_multiturn/readme.md b/examples/sglang_multiturn/readme.md
@@ -0,0 +1,40 @@
+# Multi-Turn Rollout Example (GSM8K)
+
+This example demonstrates how to perform **multi-turn rollout** using SGLang with a tool-calling capable model (e.g., Qwen2.5-3B) on the GSM8K dataset.
+
+## Usage
+
+
+### Step 1: Download GSM8K Dataset
+
+```bash
+cd examples/sglang_multiturn
+python3 gsm8k.py
+```
+
+This will download and preprocess the GSM8K dataset into ~/data/gsm8k/.
+
+### Step 2: Run Multi-Turn Rollout
+If you have 8 GPUs
+Use the standard 8-GPU script:
+
+```
+cd your_verl_root_dir
+bash examples/sglang_multiturn/run_qwen2.5-3b_gsm8k_multiturn.sh
+```
+
+If you have only 4 GPUs
+Use the fallback 4-GPU script:
+
+```
+cd your_verl_root_dir
+bash examples/sglang_multiturn/run_qwen2.5-3b_gsm8k_multiturn_4xgpu.sh 
+```
+
+# Notes
+
+- The rollout supports multi-turn conversations with tool-calling capabilities.
+
+- Current tools are used for GSM8K answer evaluation.
+
+- Future versions may extend to search and code interpreter tools.
diff --git a/examples/sglang_multiturn/run_qwen2.5-3b_gsm8k_multiturn.sh b/examples/sglang_multiturn/run_qwen2.5-3b_gsm8k_multiturn.sh
@@ -0,0 +1,53 @@
+# run on 8xH100
+# make sure your current working directory is the root of the project
+
+set -x
+
+ulimit -n 65535
+
+PROJECT_DIR="$(pwd)"
+CONFIG_PATH="$PROJECT_DIR/examples/sglang_multiturn/config"
+
+python3 -m verl.trainer.main_ppo \
+    --config-path="$CONFIG_PATH" \
+    --config-name='gsm8k_multiturn_grpo' \
+    algorithm.adv_estimator=grpo \
+    data.train_batch_size=256 \
+    data.max_prompt_length=1024 \
+    data.max_response_length=1024 \
+    data.filter_overlong_prompts=True \
+    data.truncation='error' \
+    data.return_raw_chat=True \
+    actor_rollout_ref.model.path=Qwen/Qwen2.5-3B-Instruct \
+    actor_rollout_ref.actor.optim.lr=1e-6 \
+    actor_rollout_ref.model.use_remove_padding=True \
+    actor_rollout_ref.actor.ppo_mini_batch_size=256 \
+    actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=40 \
+    actor_rollout_ref.actor.use_kl_loss=True \
+    actor_rollout_ref.actor.kl_loss_coef=0.001 \
+    actor_rollout_ref.actor.kl_loss_type=low_var_kl \
+    actor_rollout_ref.actor.entropy_coeff=0 \
+    actor_rollout_ref.model.enable_gradient_checkpointing=True \
+    actor_rollout_ref.actor.fsdp_config.param_offload=False \
+    actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \
+    actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=40 \
+    actor_rollout_ref.rollout.tensor_model_parallel_size=2 \
+    actor_rollout_ref.rollout.name=sglang_async \
+    actor_rollout_ref.rollout.gpu_memory_utilization=0.5 \
+    actor_rollout_ref.rollout.n=16 \
+    actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=40 \
+    actor_rollout_ref.ref.fsdp_config.param_offload=True \
+    algorithm.use_kl_in_reward=False \
+    trainer.critic_warmup=0 \
+    trainer.logger=['console','wandb'] \
+    trainer.project_name='gsm8k_async_rl' \
+    trainer.experiment_name='qwen2.5-3b_function_rm-gsm8k-async-sgl-multi-w-tool-verify-n16' \
+    trainer.n_gpus_per_node=8 \
+    trainer.nnodes=1 \
+    trainer.save_freq=-1 \
+    trainer.test_freq=20 \
+    data.train_files=$HOME/data/gsm8k/train.parquet \
+    data.val_files=$HOME/data/gsm8k/test.parquet \
+    actor_rollout_ref.rollout.multi_turn.tool_config_path="$PROJECT_DIR/examples/sglang_multiturn/config/tool_config/gsm8k_tool_config.yaml" \
+    trainer.total_epochs=15 $@
+