Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
48 commits
Select commit Hold shift + click to select a range
0e10c96
change ENGINE to sglang_async
May 15, 2025
edd3f70
Add megatron support to sglang_async
SwordFaith May 17, 2025
c06a169
Fix rebase error and share device mesh between rolllout and sharding …
SwordFaith May 17, 2025
393eb98
Fix formatting issue
SwordFaith May 17, 2025
b269356
bump 0.4.6.post4
ocss884 May 17, 2025
d79548d
Fix megatron support
SwordFaith May 18, 2025
b7bae38
Add tmp sandbox fusion tool
SwordFaith May 19, 2025
c6aa6e8
A tmp version for sandbox fusion test, need to improve
SwordFaith May 20, 2025
3c20523
Unified processing of batch-level and request-level generate sequence
zyzshishui May 21, 2025
a8a9dad
remove async in naming
zyzshishui May 21, 2025
5eb8913
fix review
zyzshishui May 21, 2025
041232f
add multi-turn fsdp2 ci
zyzshishui May 22, 2025
f6090e4
remove redundant fsdp2 test script
zyzshishui May 22, 2025
8ad476f
add tp size comment for megatron
zhaochenyang20 May 24, 2025
3306786
clean up comments for sglang rollout
zhaochenyang20 May 24, 2025
6e58735
Merge pull request #1 from zyzshishui/refactor_refactor
zyzshishui May 24, 2025
26f5415
delete comment and remove deprecated VerlEngine
zyzshishui May 24, 2025
9bedb2c
Merge branch 'refactor' of github.com:zyzshishui/verl into refactor
zhaochenyang20 May 24, 2025
0c325e9
refactor with block for sampling parameter update
zhaochenyang20 May 24, 2025
dbb568d
Fix review
zyzshishui May 25, 2025
7be85f7
Merge pull request #2 from zyzshishui/refactor_with
zyzshishui May 25, 2025
93ca6ea
adapt new FunctionCallParser path to sgl post5
zyzshishui May 27, 2025
c649aeb
merge main
zyzshishui May 27, 2025
eec7724
merge main
zyzshishui May 27, 2025
c4c58a3
remove files related to 1525
zyzshishui May 27, 2025
467fdd4
solve mistakes in merge, change deprecated warning location, and chan…
zyzshishui May 27, 2025
fa1fa6a
mv
ocss884 May 27, 2025
57b3dd2
mv
ocss884 May 27, 2025
2389f9f
rm useless file & clean-up rollout init
ocss884 May 28, 2025
084a4ad
Merge branch 'main' into remove-async
ocss884 May 28, 2025
e84023e
Merge branch 'volcengine:main' into remove-async
zyzshishui May 28, 2025
0aabbdd
add comment
zyzshishui May 28, 2025
43cce7f
bump to 0.4.6.post5
ocss884 May 29, 2025
d31b923
update license
ocss884 May 29, 2025
173ddf1
Merge remote-tracking branch 'upstream/main' into remove-async
zyzshishui May 29, 2025
390fdc9
Merge remote-tracking branch 'upstream/main' into remove-async
zyzshishui May 29, 2025
cc38005
Merge remote-tracking branch 'upstream/main' into remove-async
zyzshishui May 29, 2025
8e387ba
make sure required sglang version is installed
eric-haibin-lin May 29, 2025
010b790
Merge branch 'volcengine:main' into remove-async
zyzshishui May 29, 2025
0444f4a
install deps in sppo test
eric-haibin-lin May 29, 2025
62aa913
add sglang license
zyzshishui May 29, 2025
68496bf
remove deprecated AsyncSGLangRollout
zyzshishui May 30, 2025
94da29e
compatible with old versions of sglang
zyzshishui May 30, 2025
a5d875a
change sgl docker
zyzshishui May 30, 2025
392279f
add no-deps option
zyzshishui May 30, 2025
11bf1cc
use whatcanyousee/verl:ngc-cu124-vllm0.8.5-sglang0.4.6.post5-mcore0.1…
eric-haibin-lin May 30, 2025
5b87be2
Merge branch 'main' into remove-async
zyzshishui May 31, 2025
994c167
remove AsyncSGLangRollout in Search-R1 test
zyzshishui May 31, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 6 additions & 35 deletions .github/workflows/e2e_ppo_trainer.yml
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ jobs:
HF_ENDPOINT: "https://hf-mirror.com"
HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
container:
image: ocss884/verl-sglang:ngc-th2.6.0-cu126-sglang0.4.6.post4
image: whatcanyousee/verl:ngc-cu124-vllm0.8.5-sglang0.4.6.post5-mcore0.12.0-te2.3
options: --gpus all --shm-size=10g
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
Expand All @@ -222,7 +222,7 @@ jobs:
ray stop --force
ENGINE=sglang bash tests/e2e/ppo_trainer/run_function_reward.sh

e2e_ppo_trainer_sglang_async:
e2e_ppo_trainer_sglang_multiturn_with_tool:
runs-on: [L20x8]
needs: pre_commit_for_ppo
timeout-minutes: 40 # Increase this timeout value as needed
Expand All @@ -233,36 +233,7 @@ jobs:
HF_ENDPOINT: "https://hf-mirror.com"
HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
container:
image: ocss884/verl-sglang:ngc-th2.6.0-cu126-sglang0.4.6.post4
options: --gpus all --shm-size=10g
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
fetch-depth: 0
- name: Install the current repository
run: |
pip3 install -e .[test,gpu,sglang] --no-deps
- name: Prepare gsm8k dataset
run: |
ray stop --force
python3 examples/data_preprocess/gsm8k.py
- name: Running GSM8K E2E training tests on 8 L20 GPUs with rmpad using function rm and save ckpt with sglang async
run: |
ray stop --force
ENGINE=sglang_async bash tests/e2e/ppo_trainer/run_function_reward.sh

e2e_ppo_trainer_sglang_async_with_tool:
runs-on: [L20x8]
needs: pre_commit_for_ppo
timeout-minutes: 40 # Increase this timeout value as needed
env:
HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
HF_ENDPOINT: "https://hf-mirror.com"
HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
container:
image: ocss884/verl-sglang:ngc-th2.6.0-cu126-sglang0.4.6.post4
image: whatcanyousee/verl:ngc-cu124-vllm0.8.5-sglang0.4.6.post5-mcore0.12.0-te2.3
options: --gpus all --shm-size=10g
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
Expand All @@ -275,7 +246,7 @@ jobs:
run: |
ray stop --force
python3 examples/data_preprocess/gsm8k_multiturn_w_tool.py --local_dir $HOME/data/gsm8k_verl_sgl_multi_turn_preprocessed
- name: Running GSM8K with tool E2E training tests on 8 L20 GPUs with rmpad using function rm and save ckpt with sglang async
- name: Running GSM8K with tool E2E training tests on 8 L20 GPUs with rmpad using function rm and save ckpt with sglang
run: |
ray stop --force
bash tests/e2e/run_gsm8k_fsdp_sgl_multiturn_w_tool.sh
Expand All @@ -295,7 +266,7 @@ jobs:
HF_ENDPOINT: "https://hf-mirror.com"
HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
container:
image: ocss884/verl-sglang:ngc-th2.6.0-cu126-sglang0.4.6.post4
image: whatcanyousee/verl:ngc-cu124-vllm0.8.5-sglang0.4.6.post5-mcore0.12.0-te2.3
options: --gpus all --shm-size=50g # Visual dataloader requires large memory
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
Expand Down Expand Up @@ -367,7 +338,7 @@ jobs:
HF_ENDPOINT: "https://hf-mirror.com"
HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
container:
image: ocss884/verl-sglang:ngc-th2.6.0-cu126-sglang0.4.6.post4
image: whatcanyousee/verl:ngc-cu124-vllm0.8.5-sglang0.4.6.post5-mcore0.12.0-te2.3
options: --gpus all --shm-size=50g # Visual dataloader requires large memory
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
Expand Down
10 changes: 5 additions & 5 deletions .github/workflows/e2e_ppo_trainer_megatron.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ jobs:
HF_ENDPOINT: "https://hf-mirror.com"
HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
container:
image: whatcanyousee/verl:ngc-cu124-vllm0.8.5-sglang0.4.6-mcore0.12.0-te2.3
image: whatcanyousee/verl:ngc-cu124-vllm0.8.5-sglang0.4.6.post5-mcore0.12.0-te2.3
options: --gpus all --shm-size=10g
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
Expand Down Expand Up @@ -92,7 +92,7 @@ jobs:
HF_ENDPOINT: "https://hf-mirror.com"
HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
container:
image: whatcanyousee/verl:ngc-cu124-vllm0.8.5-sglang0.4.6-mcore0.12.0-te2.3
image: whatcanyousee/verl:ngc-cu124-vllm0.8.5-sglang0.4.6.post5-mcore0.12.0-te2.3
options: --gpus all --shm-size=10g
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
Expand Down Expand Up @@ -134,7 +134,7 @@ jobs:
HF_ENDPOINT: "https://hf-mirror.com"
HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
container:
image: whatcanyousee/verl:ngc-cu124-vllm0.8.5-sglang0.4.6-mcore0.12.0-te2.3
image: whatcanyousee/verl:ngc-cu124-vllm0.8.5-sglang0.4.6.post5-mcore0.12.0-te2.3
options: --gpus all --shm-size=10g
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
Expand Down Expand Up @@ -167,7 +167,7 @@ jobs:
HF_ENDPOINT: "https://hf-mirror.com"
HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
container:
image: whatcanyousee/verl:ngc-cu124-vllm0.8.5-sglang0.4.6-mcore0.12.0-te2.3
image: whatcanyousee/verl:ngc-cu124-vllm0.8.5-sglang0.4.6.post5-mcore0.12.0-te2.3
options: --gpus all --shm-size=10g
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
Expand Down Expand Up @@ -206,7 +206,7 @@ jobs:
HF_ENDPOINT: "https://hf-mirror.com"
HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
container:
image: whatcanyousee/verl:ngc-cu124-vllm0.8.5-sglang0.4.6-mcore0.12.0-te2.3
image: whatcanyousee/verl:ngc-cu124-vllm0.8.5-sglang0.4.6.post5-mcore0.12.0-te2.3
options: --gpus all --shm-size=10g
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
Expand Down
8 changes: 2 additions & 6 deletions .github/workflows/sgl.yml
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ jobs:
HF_HUB_ENABLE_HF_TRANSFER: 1
SGL_DISABLE_TP_MEMORY_INBALANCE_CHECK: "True"
container:
image: ocss884/verl-sglang:ngc-th2.6.0-cu126-sglang0.4.6.post4
image: whatcanyousee/verl:ngc-cu124-vllm0.8.5-sglang0.4.6.post5-mcore0.12.0-te2.3
options: --gpus all --shm-size=10g
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
Expand All @@ -73,11 +73,7 @@ jobs:
- name: Test the latest SGLang
run: |
cd tests/workers/rollout
torchrun --nnodes=1 --nproc_per_node=4 $(which pytest) -s test_sglang_spmd.py
- name: Test the latest SGLang async
run: |
cd tests/workers/rollout
torchrun --nnodes=1 --nproc_per_node=2 $(which pytest) -s test_sglang_async_spmd.py
torchrun --nnodes=1 --nproc_per_node=2 $(which pytest) -s test_sglang_spmd.py
- name: Test the latest SGLang Rollout async with tool
run: |
cd tests/workers/rollout
Expand Down
2 changes: 1 addition & 1 deletion docker/Dockerfile.rocm
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
# Support - Traing: fsdp; Inference: vllm
# FROM rocm/vllm:rocm6.2_mi300_ubuntu20.04_py3.9_vllm_0.6.4
# Support - Traing: fsdp; Inference: vllm, sglang
FROM lmsysorg/sglang:v0.4.6.post4-rocm630
FROM lmsysorg/sglang:v0.4.6.post5-rocm630

# Set working directory
# WORKDIR $PWD/app
Expand Down
4 changes: 2 additions & 2 deletions docker/Dockerfile.sglang
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,8 @@ RUN pip config set global.index-url "${PIP_INDEX}" && \
pip config set global.extra-index-url "${PIP_INDEX}" && \
python -m pip install --upgrade pip

# Install sglang-0.4.6.post4 and torch-memory-saver
RUN pip install "sglang[all]==0.4.6.post4" --no-cache-dir --find-links https://flashinfer.ai/whl/cu124/torch2.6/flashinfer-python && pip install torch-memory-saver --no-cache-dir
# Install sglang-0.4.6.post5 and torch-memory-saver
RUN pip uninstall -y cuda-python && pip install "sglang[all]==0.4.6.post5" --no-cache-dir --find-links https://flashinfer.ai/whl/cu124/torch2.6/flashinfer-python && pip install torch-memory-saver --no-cache-dir

# Install torch-2.6.0
RUN pip install --no-cache-dir torch==2.6.0 torchvision==0.21.0 torchaudio==2.6.0 tensordict torchdata \
Expand Down
4 changes: 2 additions & 2 deletions docker/Dockerfile.vllm.sglang.megatron
Original file line number Diff line number Diff line change
Expand Up @@ -56,12 +56,12 @@ RUN aria2c --always-resume=true --max-tries=99999 https://developer.download.nvi
update-alternatives --set cuda /usr/local/cuda-12.4 && \
rm -rf /usr/local/cuda-12.6

# Install torch-2.6.0+cu124 + vllm-0.8.5.post1 + sglang-0.4.6.post4
# Install torch-2.6.0+cu124 + vllm-0.8.5.post1 + sglang-0.4.6.post5
# torch-2.6.0+cu124: cxx11abi=False
# torch-2.6.0+cu126: cxx11abi=True
# see https://github.com/flashinfer-ai/flashinfer/issues/911
# Install sglang-0.4.6.post1 and torch-memory-saver
RUN pip install "sglang[all]==0.4.6.post1" --no-cache-dir --find-links https://flashinfer.ai/whl/cu124/torch2.6/flashinfer-python && pip install torch-memory-saver --no-cache-dir
RUN pip install "sglang[all]==0.4.6.post5" --no-cache-dir --find-links https://flashinfer.ai/whl/cu124/torch2.6/flashinfer-python && pip install torch-memory-saver --no-cache-dir

RUN pip install --no-cache-dir "vllm==0.8.5.post1" "torch==2.6.0" "torchvision==0.21.0" "torchaudio==2.6.0" "tensordict==0.6.2" torchdata

Expand Down
2 changes: 1 addition & 1 deletion docs/amd_tutorial/amd_build_dockerfile_page.rst
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ docker/Dockerfile.rocm
# Support - Traing: fsdp; Inference: vllm
# FROM rocm/vllm:rocm6.2_mi300_ubuntu20.04_py3.9_vllm_0.6.4
# Support - Traing: fsdp; Inference: vllm, sglang
FROM lmsysorg/sglang:v0.4.6.post4-rocm630
FROM lmsysorg/sglang:v0.4.6.post5-rocm630

# Set working directory
# WORKDIR $PWD/app
Expand Down
4 changes: 2 additions & 2 deletions docs/sglang_multiturn/multiturn.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@ To enable multi-turn rollout, make sure to configure the following fields in you
actor_rollout_ref:
rollout:
multi_turn: True
name: "sglang_async"
name: "sglang"

These configuration activates the sglang_async engine for multi-turn interaction during rollout.
These configuration activates the sglang engine for multi-turn interaction during rollout.

Custom Tool Configuration
~~~~~~~~~~~~~~~~~~~~~~~~~
Expand Down
4 changes: 2 additions & 2 deletions docs/start/install.rst
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ For vLLM with Megatron or FSDP, please use the stable version of image ``whatcan

For latest vLLM with FSDP, please refer to ``hiyouga/verl:ngc-th2.6.0-cu126-vllm0.8.4-flashinfer0.2.2-cxx11abi0``.

For SGLang with FSDP, please use ``ocss884/verl-sglang:ngc-th2.6.0-cu126-sglang0.4.6.post4`` which is provided by SGLang RL Group.
For SGLang with FSDP, please use ``ocss884/verl-sglang:ngc-th2.6.0-cu126-sglang0.4.6.post5`` which is provided by SGLang RL Group.

See files under ``docker/`` for NGC-based image or if you want to build your own.

Expand Down Expand Up @@ -79,7 +79,7 @@ See files under ``docker/`` for NGC-based image or if you want to build your own
- **Flash Attenttion**: 2.7.4.post1
- **Flash Infer**: 0.2.2.post1
- **vLLM**: 0.8.5
- **SGLang**: 0.4.6.post4
- **SGLang**: 0.4.6.post5
- **Megatron-LM**: core_v0.12.0
- **TransformerEngine**: 2.3
- **Ray**: 2.44.1
Expand Down
10 changes: 5 additions & 5 deletions docs/workers/sglang_worker.rst
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ Please always follow the following command to install SGLang with verl.
.. code-block:: bash

pip install --upgrade pip
# Currently 0.4.6.post4, subject to updates at any time, please refer to the latest version specified in `setup.py`
# Currently 0.4.6.post5, subject to updates at any time, please refer to the latest version specified in `setup.py`
pip install -e ".[sglang]"

You can check the following dependencies are in your environment:
Expand All @@ -31,8 +31,8 @@ You can check the following dependencies are in your environment:
- **PyTorch**: 2.6.0+cu124
- **CUDA**: 12.4
- **flashinfer-python**: 0.2.5+cu124torch2.6
- **sgLang**: 0.4.6.post4
- **sgl-kernel**: 0.1.2.post1
- **sgLang**: 0.4.6.post5
- **sgl-kernel**: 0.1.4

Using SGLang as the Inference Backend for PPO Training on a Single Machine
-------------------------------------------------------------------------
Expand Down Expand Up @@ -87,7 +87,7 @@ Why export SGL_DISABLE_TP_MEMORY_INBALANCE_CHECK?

1. ``verl`` initializes a ``SGLangRollout`` module during rollout, which is used to evaluate/generate samples.

2. ``SGLangRollout`` will initialize ``VerlEngine``, and further initialize a ``torch.distributed.DeviceMesh``, used to support Tensor Parallel (TP).
2. ``SGLangRollout`` will initialize ``Engine``, and further initialize a ``torch.distributed.DeviceMesh``, used to support Tensor Parallel (TP).

3. ``DeviceMesh.init()`` internally checks the free GPU memory of all participating devices. If the difference is too large (more than ~10%), it directly reports an error to avoid initialization failures or deadlocks.

Expand All @@ -111,7 +111,7 @@ Early workers already use up GPU memory → late workers still have empty memory

**3. SGLang's TP init uses "all-device broadcast", but there's no uniform release timing**

Although ``SGLangRollout`` may only involve subset of GPUs, its ``VerlEngine`` initialization calls ``torch.distributed.init_process_group()`` and broadcasts weights, so:
Although ``SGLangRollout`` may only involve subset of GPUs, its ``Engine`` initialization calls ``torch.distributed.init_process_group()`` and broadcasts weights, so:

- Non-rollout GPUs also join the communication.
- Later on, ``DeviceMesh`` init will fail due to "inconsistent memory".
Expand Down
2 changes: 1 addition & 1 deletion examples/sglang_multiturn/config/gsm8k_multiturn_grpo.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ data:
actor_rollout_ref:
hybrid_engine: True
rollout:
name: sglang_async
name: sglang
multi_turn:
enable: True
max_turns: 5
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ data:
actor_rollout_ref:
hybrid_engine: True
rollout:
name: sglang_async
name: sglang
multi_turn:
enable: True
max_turns: 5
Expand Down
4 changes: 2 additions & 2 deletions examples/sglang_multiturn/run_qwen2.5-3b_gsm8k_multiturn.sh
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ python3 -m verl.trainer.main_ppo \
actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \
actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=32 \
actor_rollout_ref.rollout.tensor_model_parallel_size=2 \
actor_rollout_ref.rollout.name=sglang_async \
actor_rollout_ref.rollout.name=sglang \
actor_rollout_ref.rollout.gpu_memory_utilization=0.5 \
actor_rollout_ref.rollout.n=16 \
actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=32 \
Expand All @@ -41,7 +41,7 @@ python3 -m verl.trainer.main_ppo \
trainer.critic_warmup=0 \
trainer.logger=['console','wandb'] \
trainer.project_name='gsm8k_async_rl' \
trainer.experiment_name='qwen2.5-3b_function_rm-gsm8k-async-sgl-multi-w-tool-verify-n16' \
trainer.experiment_name='qwen2.5-3b_function_rm-gsm8k-sgl-multi-w-tool-verify-n16' \
trainer.n_gpus_per_node=8 \
trainer.nnodes=1 \
trainer.save_freq=-1 \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ python3 -m verl.trainer.main_ppo \
actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \
actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=32 \
actor_rollout_ref.rollout.tensor_model_parallel_size=2 \
actor_rollout_ref.rollout.name=sglang_async \
actor_rollout_ref.rollout.name=sglang \
actor_rollout_ref.rollout.gpu_memory_utilization=0.5 \
actor_rollout_ref.rollout.n=16 \
actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=32 \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,15 +45,15 @@ python3 -m verl.trainer.main_ppo \
actor_rollout_ref.ref.megatron.tensor_model_parallel_size=2 \
actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=16 \
actor_rollout_ref.rollout.tensor_model_parallel_size=2 \
actor_rollout_ref.rollout.name=sglang_async \
actor_rollout_ref.rollout.name=sglang \
actor_rollout_ref.rollout.gpu_memory_utilization=0.5 \
actor_rollout_ref.rollout.n=8 \
actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=16 \
algorithm.use_kl_in_reward=False \
trainer.critic_warmup=0 \
trainer.logger=['console','wandb'] \
trainer.project_name='gsm8k_async_rl' \
trainer.experiment_name='qwen2.5-3b_function_rm-gsm8k-async-sgl-multi-w-tool-n8-mcore-v2505201745_seed42' \
trainer.experiment_name='qwen2.5-3b_function_rm-gsm8k-sgl-multi-w-tool-n8-mcore-v2505201745_seed42' \
trainer.n_gpus_per_node=8 \
trainer.nnodes=1 \
trainer.save_freq=-1 \
Expand Down
4 changes: 2 additions & 2 deletions requirements_sglang.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,6 @@ torchdata
torchvision
transformers
wandb
sglang[all]==0.4.6.post4
sglang[all]==0.4.6.post5
torch-memory-saver>=0.0.5
huggingface_hub
huggingface_hub
7 changes: 6 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,12 @@
GPU_REQUIRES = ["liger-kernel", "flash-attn"]
MATH_REQUIRES = ["math-verify"] # Add math-verify as an optional dependency
VLLM_REQUIRES = ["tensordict<=0.6.2", "vllm<=0.8.5"]
SGLANG_REQUIRES = ["tensordict<=0.6.2", "sglang[srt,openai]==0.4.6.post4", "torch-memory-saver>=0.0.5", "torch==2.6.0"]
SGLANG_REQUIRES = [
"tensordict<=0.6.2",
"sglang[srt,openai]==0.4.6.post5",
"torch-memory-saver>=0.0.5",
"torch==2.6.0",
]

extras_require = {
"test": TEST_REQUIRES,
Expand Down
4 changes: 2 additions & 2 deletions tests/e2e/run_gsm8k_fsdp_sgl_multiturn_w_tool.sh
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ python3 -m verl.trainer.main_ppo \
actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \
actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=32 \
actor_rollout_ref.rollout.tensor_model_parallel_size=2 \
actor_rollout_ref.rollout.name=sglang_async \
actor_rollout_ref.rollout.name=sglang \
actor_rollout_ref.rollout.gpu_memory_utilization=0.5 \
actor_rollout_ref.rollout.n=8 \
actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=32 \
Expand All @@ -46,7 +46,7 @@ python3 -m verl.trainer.main_ppo \
trainer.critic_warmup=0 \
trainer.logger=['console'] \
trainer.project_name='gsm8k_async_rl' \
trainer.experiment_name=qwen2.5-3b_function_rm-gsm8k-async-sgl-multi-w-tool-$FSDP_STRATEGY-rebased-0427-verify-n16 \
trainer.experiment_name=qwen2.5-3b_function_rm-gsm8k-sgl-multi-w-tool-$FSDP_STRATEGY-rebased-0427-verify-n16 \
trainer.n_gpus_per_node=8 \
trainer.nnodes=1 \
trainer.save_freq=-1 \
Expand Down
2 changes: 1 addition & 1 deletion tests/e2e/run_ppo_trainer_megatron.sh
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ if [ $SKIP_SAVE_HF_MODEL -eq 1 ]; then
CHECKPOINT_CONTENTS=['model','optimizer','extra']
fi

ENGINES=("vllm" "sglang_async")
ENGINES=("vllm" "sglang")

exp_name="$(basename "${MODEL_ID,,}")-megatron-gsm8k-minimal"

Expand Down
2 changes: 1 addition & 1 deletion tests/workers/rollout/test_async_sglang_server.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# Copyright 2023-2024 SGLang Team
# Copyright 2025 Bytedance Ltd. and/or its affiliates
#
# Licensed under the Apache License, Version 2.0 (the "License");
Expand All @@ -20,7 +21,6 @@
@patch.dict(
"sys.modules",
{
"verl.workers.rollout.sglang_rollout.async_sglang_rollout": MagicMock(AsyncSGLangRollout=MagicMock()),
"verl.workers.rollout.sglang_rollout.sglang_rollout": MagicMock(SGLangRollout=MagicMock()),
},
)
Expand Down
Loading
Loading