Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions .github/workflows/dataset.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,10 @@ jobs:
run: |
[ ! -d "$HOME/verl-data" ] && git clone --depth 1 https://github.com/eric-haibin-lin/verl-data ~/verl-data
python3 examples/data_preprocess/geo3k.py
pytest -s -x tests/verl/utils/dataset/test_rl_dataset.py
pytest -s -x tests/verl/utils/dataset/test_sft_dataset.py
pytest -s -x tests/verl/utils/test_import_utils.py
# pytest -s -x tests/verl/utils/dataset/test_rm_dataset.py
pytest -s -x tests/utils/dataset/test_rl_dataset.py
pytest -s -x tests/utils/dataset/test_sft_dataset.py
pytest -s -x tests/utils/test_import_utils.py
# pytest -s -x tests/utils/dataset/test_rm_dataset.py
- name: Running ray test using cupy (move it to L20 when dockerfile ready)
run: |
cd tests/ray_gpu
Expand Down
24 changes: 12 additions & 12 deletions .github/workflows/model.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ on:
- "verl/**/*.py"
# Entrypoints
- ".github/workflows/model.yml"
- "tests/checkpoint/test_fsdp_ckpt.py"
- "tests/model/test_transformers_ulysses.py"
- "tests/utils/checkpoint/test_fsdp_ckpt.py"
- "tests/models/test_transformers_ulysses.py"
- "tests/distributed/run_all.sh"

# Declare permissions just read content.
Expand Down Expand Up @@ -46,37 +46,37 @@ jobs:
pip3 install --upgrade transformers
- name: Running rmpad model tests on 8 L20 GPUs + flash_attn 2.5.8
run: |
pytest -s tests/model/test_transformer.py
pytest -s tests/models/test_transformer.py
- name: Running rmpad model tests on 8 L20 GPUs + latest flash_attn
run: |
pip3 install --upgrade flash_attn --no-build-isolation
pytest -s tests/model/test_transformer.py
pytest -s tests/models/test_transformer.py
- name: Running FSDP rmpad model tests on 8 L20 GPUs + latest flash_attn
run: |
STRATEGY=fsdp torchrun --nproc_per_node=8 tests/checkpoint/test_fsdp_ckpt.py
STRATEGY=fsdp torchrun --nproc_per_node=8 tests/utils/checkpoint/test_fsdp_ckpt.py
- name: Running transformers ulysses tests on 8 L20 GPUs + latest transformers
run: |
torchrun --nproc_per_node=8 -m pytest tests/model/test_transformers_ulysses.py
torchrun --nproc_per_node=8 -m pytest tests/models/test_transformers_ulysses.py
- name: Running transformers ulysses tests on 8 L20 GPUs + transformers 4.49.0
run: |
pip3 install transformers==4.49.0
torchrun --nproc_per_node=8 -m pytest tests/model/test_transformers_ulysses.py
torchrun --nproc_per_node=8 -m pytest tests/models/test_transformers_ulysses.py
- name: Running transformers ulysses tests on 8 L20 GPUs + transformers 4.48.0
run: |
pip3 install transformers==4.48.0
torchrun --nproc_per_node=8 -m pytest tests/model/test_transformers_ulysses.py
torchrun --nproc_per_node=8 -m pytest tests/models/test_transformers_ulysses.py
- name: Running transformers ulysses tests on 8 L20 GPUs + transformers 4.47.0
run: |
pip3 install transformers==4.47.0
torchrun --nproc_per_node=8 -m pytest tests/model/test_transformers_ulysses.py
torchrun --nproc_per_node=8 -m pytest tests/models/test_transformers_ulysses.py
- name: Running transformers ulysses tests on 8 L20 GPUs + transformers 4.46.0
run: |
pip3 install transformers==4.46.0
torchrun --nproc_per_node=8 -m pytest tests/model/test_transformers_ulysses.py
torchrun --nproc_per_node=8 -m pytest tests/models/test_transformers_ulysses.py
- name: Running transformers ulysses tests on 8 L20 GPUs + transformers 4.45.0
run: |
pip3 install transformers==4.45.0
torchrun --nproc_per_node=8 -m pytest tests/model/test_transformers_ulysses.py
torchrun --nproc_per_node=8 -m pytest tests/models/test_transformers_ulysses.py
- name: Run distributed test
run: |
bash tests/distributed/run_all.sh
Expand Down Expand Up @@ -106,4 +106,4 @@ jobs:
- name: Running FSDP2 rmpad model tests on 8 L20 GPUs + latest flash_attn
run: |
pip3 install --upgrade flash_attn --no-build-isolation
STRATEGY=fsdp2 torchrun --nproc_per_node=8 tests/checkpoint/test_fsdp_ckpt.py
STRATEGY=fsdp2 torchrun --nproc_per_node=8 tests/utils/checkpoint/test_fsdp_ckpt.py
6 changes: 3 additions & 3 deletions .github/workflows/sgl.yml
Original file line number Diff line number Diff line change
Expand Up @@ -68,13 +68,13 @@ jobs:
pip3 install -e .[test,gpu,sglang] --no-deps
- name: Test the latest SGLang
run: |
cd tests/rollout
cd tests/workers/rollout
torchrun --nnodes=1 --nproc_per_node=4 $(which pytest) -s test_sglang_spmd.py
- name: Test the latest SGLang async
run: |
cd tests/rollout
cd tests/workers/rollout
torchrun --nnodes=1 --nproc_per_node=2 $(which pytest) -s test_sglang_async_spmd.py
- name: Test the latest SGLang Rollout async with tool
run: |
cd tests/rollout
cd tests/workers/rollout
torchrun --nnodes=1 --nproc_per_node=2 $(which pytest) -s test_sglang_async_rollout_w_tools.py
8 changes: 4 additions & 4 deletions .github/workflows/verl_unit_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,9 @@ jobs:
pip install -e .[test]
- name: Running test protocol.py
run: |
cd tests/verl
cd tests
pytest -s -x test_protocol.py
- name: Running utils tests
- name: running utils tests
run: |
cd tests/verl/utils
pytest -s -x --ignore=dataset/ --ignore=test_torch_functional.py .
cd tests/utils
pytest -s -x --ignore=dataset/ --ignore=checkpoint/ --ignore=test_flops_counter.py --ignore=test_torch_functional.py .
12 changes: 6 additions & 6 deletions .github/workflows/vllm.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ on:
- "!**/*sglang*"
# Entrypoints
- ".github/workflows/vllm.yml"
- "tests/generation"
- "tests/e2e/generation"
- "tests/rollout"
- "verl/trainer/main_generation.py"
- "verl/trainer/config/generation.yaml"
Expand Down Expand Up @@ -72,26 +72,26 @@ jobs:
# Disable requests to avoid network errors
- name: Running vllm tests on 8 L20 GPUs
run: |
cd tests/rollout
cd tests/workers/rollout
torchrun --standalone --nnodes=1 --nproc_per_node=8 $(which pytest) -s test_vllm_hf_loader.py
- name: Test the latest vLLM
run: |
pip3 install --upgrade vllm==0.7.3
cd tests/rollout
cd tests/workers/rollout
torchrun --standalone --nnodes=1 --nproc_per_node=4 $(which pytest) -s test_vllm_spmd.py
- name: Run Qwen 0.5B generation test
run: |
cd tests/generation
cd tests/e2e/generation
export OUTPUT_PATH="${HOME}/data/gen/qwen_05_gen_test.parquet"
MODEL_ID=Qwen/Qwen2.5-0.5B-Instruct NGPUS_PER_NODE=4 GEN_TP=2 bash ./run_gen_qwen05.sh
rm -rf "${OUTPUT_PATH}"
- name: Run Qwen 0.5B generation test when world_size == 1
run: |
cd tests/generation
cd tests/e2e/generation
export OUTPUT_PATH="${HOME}/data/gen/qwen_05_gen_test.parquet"
MODEL_ID=Qwen/Qwen2.5-0.5B-Instruct NGPUS_PER_NODE=1 GEN_TP=1 bash ./run_gen_qwen05.sh
rm -rf "${OUTPUT_PATH}"
- name: Running multi-turn rollout tests on 8 L20 GPUs
run: |
pip3 install --upgrade vllm==0.8.3 tensordict==0.7.2
python3 tests/rollout/test_vllm_multi_turn.py
python3 tests/workers/rollout/test_vllm_multi_turn.py
3 changes: 0 additions & 3 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -109,9 +109,6 @@ ENV/
# Mac
.DS_Store

# output logs
tests/e2e/toy_examples/deepspeed/synchronous/output.txt

# vim
*.swp

Expand Down
21 changes: 21 additions & 0 deletions docs/api/trainer.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
Trainers
=========================

Trainers drive the training loop. Introducing new trainer classes in case of new training paradiam is encouraged.

.. autosummary::
:nosignatures:

verl.trainer.ppo.ray_trainer.RayPPOTrainer


Core APIs
~~~~~~~~~~~~~~~~~

.. autoclass:: verl.trainer.ppo.ray_trainer.RayPPOTrainer

.. automodule:: verl.utils.tokenizer
:members: hf_tokenizer

.. automodule:: verl.single_controller
:members: Worker, WorkerGroup, ClassWithInitArgs, ResourcePool
4 changes: 2 additions & 2 deletions docs/perf/perf_tuning.rst
Original file line number Diff line number Diff line change
Expand Up @@ -51,12 +51,12 @@ Currently, for llama, mistral, gemma1 and qwen based models, users can enable `u
sequence packing implementation provided by transformers library.

For other models, transformers library may also support it but we haven't tested it yet.
Users can add the desired model config to the `test_transformer.py <https://github.com/volcengine/verl/blob/main/tests/model/test_transformer.py#L24>`_ file.
Users can add the desired model config to the `test_transformer.py <https://github.com/volcengine/verl/blob/main/tests/models/test_transformer.py#L24>`_ file.
And test its functionaility by running the following command:

.. code-block:: bash

pytest -s tests/model/test_transformer.py
pytest -s tests/models/test_transformer.py

If the test passes, you can add your desired model into the model `registry.py <https://github.com/volcengine/verl/blob/main/verl/models/registry.py#L24>`_ file.
Then, you can enjoy the performance boost of sequence packing
Expand Down
File renamed without changes.
File renamed without changes.
4 changes: 2 additions & 2 deletions recipe/dapo/run_dapo_early_qwen2.5_32b.sh
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ gen_tp=4

ray job submit --no-wait --runtime-env="${RUNTIME_ENV}" \
--working-dir "${WORKING_DIR}" \
-- python3 -m recipe.dapo.src.main_dapo \
-- python3 -m recipe.dapo.main_dapo \
data.train_files="${TRAIN_FILE}" \
data.val_files="${TEST_FILE}" \
data.prompt_key=prompt \
Expand Down Expand Up @@ -125,4 +125,4 @@ ray job submit --no-wait --runtime-env="${RUNTIME_ENV}" \
trainer.save_freq=5 \
trainer.total_epochs=1 \
trainer.default_local_dir="${CKPTS_DIR}" \
trainer.resume_mode=auto
trainer.resume_mode=auto
4 changes: 2 additions & 2 deletions recipe/dapo/run_dapo_qwen2.5_32b.sh
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ gen_tp=4

ray job submit --no-wait --runtime-env="${RUNTIME_ENV}" \
--working-dir "${WORKING_DIR}" \
-- python3 -m recipe.dapo.src.main_dapo \
-- python3 -m recipe.dapo.main_dapo \
data.train_files="${TRAIN_FILE}" \
data.val_files="${TEST_FILE}" \
data.prompt_key=prompt \
Expand Down Expand Up @@ -127,4 +127,4 @@ ray job submit --no-wait --runtime-env="${RUNTIME_ENV}" \
trainer.save_freq=5 \
trainer.total_epochs=1 \
trainer.default_local_dir="${CKPTS_DIR}" \
trainer.resume_mode=auto
trainer.resume_mode=auto
4 changes: 2 additions & 2 deletions recipe/dapo/run_dapo_wo_ds_qwen2.5_32b.sh
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ gen_tp=4

ray job submit --no-wait --runtime-env="${RUNTIME_ENV}" \
--working-dir "${WORKING_DIR}" \
-- python3 -m recipe.dapo.src.main_dapo \
-- python3 -m recipe.dapo.main_dapo \
data.train_files="${TRAIN_FILE}" \
data.val_files="${TEST_FILE}" \
data.prompt_key=prompt \
Expand Down Expand Up @@ -122,4 +122,4 @@ ray job submit --no-wait --runtime-env="${RUNTIME_ENV}" \
trainer.save_freq=5 \
trainer.total_epochs=1 \
trainer.default_local_dir="${CKPTS_DIR}" \
trainer.resume_mode=auto
trainer.resume_mode=auto
2 changes: 1 addition & 1 deletion recipe/dapo/test_dapo_7b.sh
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ offload=False

ray job submit --no-wait --runtime-env="${RUNTIME_ENV}" \
--working-dir "${WORKING_DIR}" \
-- python3 -m recipe.dapo.src.main_dapo \
-- python3 -m recipe.dapo.main_dapo \
data.train_files="${TRAIN_FILE}" \
data.val_files="${TEST_FILE}" \
data.prompt_key=prompt \
Expand Down
2 changes: 1 addition & 1 deletion tests/e2e/run_dapo.sh
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ gen_prompt_bsz=$((train_prompt_bsz * 4))

exp_name="$(basename "${MODEL_ID,,}")-dapo-minimal"

python3 -m recipe.dapo.src.main_dapo \
python3 -m recipe.dapo.main_dapo \
data.train_files="${HOME}/data/gsm8k/train.parquet" \
data.val_files="${HOME}/data/gsm8k/test.parquet" \
reward_model.reward_manager=dapo \
Expand Down
File renamed without changes.
File renamed without changes.
11 changes: 4 additions & 7 deletions tests/utility/test_timeout_decorator.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,14 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import time
import os
import sys
import multiprocessing
import queue
import pytest # Import pytest
from functools import wraps
import sys
import threading
import time

import pytest # Import pytest

from verl.utils.py_functional import timeout_limit as timeout
import threading

# --- Test Task Functions ---
TEST_TIMEOUT_SECONDS = 1.5 # Timeout duration for tests
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import pytest
import math
import json

import pytest

from verl.utils.flops_counter import FlopsCounter

VALID_CONFIG_TYPE = {"llama", "qwen2", "qwen3", "qwen3_moe", "deepseek_v3"}
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from openai.types.chat.chat_completion import ChatCompletion
from vllm.entrypoints.openai.protocol import ChatCompletionRequest, ChatCompletionResponse, ChatCompletionStreamResponse, ErrorResponse

from tests.rollout.async_rollout_utils import init_async_rollout_manager
from tests.workers.rollout.async_rollout_utils import init_async_rollout_manager
from verl.protocol import DataProto


Expand Down
File renamed without changes.
1 change: 1 addition & 0 deletions verl/single_controller/base/worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ def _configure_before_init(self, register_center_name: str, rank: int):
def __init__(self, cuda_visible_devices=None) -> None:
# construct a meta from environment variable. Note that the import must be inside the class because it is executed remotely
import os

import torch
from packaging import version

Expand Down
4 changes: 2 additions & 2 deletions verl/utils/megatron_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,9 @@
from megatron.core.utils import get_attr_wrapped_model
from transformers import PretrainedConfig

from verl.utils.torch_dtypes import PrecisionType
from verl.utils.model import normalize_model_name
import verl.utils.megatron.tensor_parallel as tp_utils
from verl.utils.model import normalize_model_name
from verl.utils.torch_dtypes import PrecisionType


def get_model_config(model):
Expand Down
2 changes: 1 addition & 1 deletion verl/utils/reward_score/math_dapo.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@
# Adapted from https://github.com/EleutherAI/lm-evaluation-harness/blob/main/lm_eval/tasks/hendrycks_math/utils.py

import re
import signal
from typing import Optional


def last_boxed_only_string(string: str) -> Optional[str]:
"""Extract the last LaTeX boxed expression from a string.

Expand Down
3 changes: 2 additions & 1 deletion verl/utils/reward_score/prime_math/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,10 @@
from pylatexenc import latex2text
from sympy.parsing import sympy_parser

from verl.utils.py_functional import timeout_limit

from . import math_normalize
from .grader import math_equal
from verl.utils.py_functional import timeout_limit

# import math_normalize
# from grader import math_equal
Expand Down
1 change: 0 additions & 1 deletion verl/utils/reward_score/prime_math/grader.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,6 @@
import contextlib
import math
import re
import signal
from math import isclose
from typing import Union

Expand Down
1 change: 1 addition & 0 deletions verl/workers/megatron_workers.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,6 +262,7 @@ def _build_rollout(self, trust_remote_code=False):
log_gpu_memory_usage("After building sharding manager", logger=logger)
elif self.config.rollout.name == 'sglang':
from verl.workers.rollout.sglang_rollout import SGLangRollout

# NOTE(linjunrong): Due to recent fp8 support in SGLang. Now importing any symbol relate to SGLang's model_runner would check CUDA device capability.
# However, due to veRL's setting, the main process of ray can not find any CUDA device, which would potentially lead to:
# "RuntimeError: No CUDA GPUs are available".
Expand Down
Loading
Loading