Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
1 change: 1 addition & 0 deletions .github/workflows/sanity.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ jobs:
- name: Install the current repository
run: |
pip3 install torch torchvision --index-url https://download.pytorch.org/whl/cpu
pip3 install --no-deps "verl @ git+https://github.com/verl-project/verl.git@main"
pip3 install -r requirements.txt
pip3 install -r requirements-test.txt
pip3 install --no-deps -e .
Expand Down
3 changes: 2 additions & 1 deletion .github/workflows/type-coverage-check.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,12 @@ jobs:
- name: Install dependencies
run: |
pip3 install torch torchvision --index-url https://download.pytorch.org/whl/cpu
pip3 install --no-deps "verl @ git+https://github.com/verl-project/verl.git@main"
pip3 install -r requirements.txt
pip3 install --no-deps -e .
- name: Run type annotation coverage check
run: |
python3 tests/special_sanity/type_coverage_check.py
- name: Run docstring coverage check
run: |
python3 tests/special_sanity/check_api_docs.py verl
python3 tests/special_sanity/check_api_docs.py verl_omni
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ repos:
- repo: local
hooks:
- id: autogen-trainer-cfg
name: Generate and verify verl/trainer/config/_generated_*.yaml
name: Generate and verify verl_omni/trainer/config/_generated_*.yaml
entry: scripts/generate_trainer_config.sh
language: script
pass_filenames: false
Expand Down
1 change: 0 additions & 1 deletion examples/flowgrpo_trainer/data_process/qwenimage_ocr.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
import os

import datasets

from verl.utils.hdfs_io import copy, makedirs


Expand Down
6 changes: 3 additions & 3 deletions examples/flowgrpo_trainer/diffusers_impl/qwen_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,11 @@
from diffusers.models.transformers.transformer_qwenimage import QwenImageTransformer2DModel
from diffusers.pipelines.qwenimage.pipeline_qwenimage import calculate_shift
from tensordict import TensorDict

from verl.models.diffusion_model import DiffusionModelBase
from verl.utils import tensordict_utils as tu
from verl.utils.device import get_device_name
from verl.workers.config import DiffusionModelConfig

from verl_omni.models.diffusion_model import DiffusionModelBase
from verl_omni.workers.config import DiffusionModelConfig

from ..scheduler import FlowMatchSDEDiscreteScheduler

Expand Down
1 change: 0 additions & 1 deletion examples/flowgrpo_trainer/reward_fn.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,6 @@ async def compute_score_ocr(
import re

import Levenshtein

from verl.utils.experimental.reward_utils import pil_image_to_base64
from verl.utils.ray_utils import get_event_loop

Expand Down
2 changes: 1 addition & 1 deletion examples/flowgrpo_trainer/run_qwen_image_ocr_lora.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ reward_path=examples/flowgrpo_trainer/reward_fn.py
reward_model_name=$HOME/models/Qwen/Qwen3-VL-8B-Instruct


python3 -m verl.trainer.main_flowgrpo \
python3 -m verl_omni.trainer.main_flowgrpo \
algorithm.adv_estimator=flow_grpo \
data.train_files=$ocr_train_path \
data.val_files=$ocr_test_path \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from vllm_omni.diffusion.models.qwen_image import QwenImagePipeline
from vllm_omni.diffusion.request import OmniDiffusionRequest

from verl.models.diffusion_model import VllmOmniPipelineBase
from verl_omni.models.diffusion_model import VllmOmniPipelineBase

from ..scheduler import FlowMatchSDEDiscreteScheduler

Expand Down
19 changes: 7 additions & 12 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ build-backend = "setuptools.build_meta"
# -------------------------------
[project]
name = "verl-omni"
# We'll mark the version as "dynamic" because it's read from the file "verl/version/version"
# We'll mark the version as "dynamic" because it's read from the file "verl_omni/version/version"
# (PEP 621 calls this "dynamic version").
# The actual version is specified in the [tool.setuptools.dynamic] section below.
dynamic = ["version", "dependencies", "optional-dependencies", "authors", "urls"]
Expand All @@ -33,7 +33,7 @@ line-length = 120
exclude = ["scripts/legacy_model_merger.py"]

[tool.ruff.lint]
isort = {known-first-party = ["verl"]}
isort = {known-first-party = ["verl_omni"]}
# c.f. https://github.com/vllm-project/vllm/blob/ce8d6b75fc0586045df75ee1568a5b5f9957251b/pyproject.toml
select = [
# pycodestyle
Expand Down Expand Up @@ -79,11 +79,7 @@ ignore_errors = true

[[tool.mypy.overrides]]
module = [
"verl.trainer.config.algorithm",
"verl.trainer.ppo.core_algos",
"verl.trainer.ppo.reward",
"verl.workers.reward_manager",
"verl.workers.reward_manager.*",
"verl_omni.trainer.config.algorithm",
]
ignore_errors = false

Expand All @@ -95,20 +91,19 @@ ignore_errors = false
# This corresponds to `include_package_data=True` in setup.py.
include-package-data = true

# We read the version from a file in 'verl/version/version'
# We read the version from a file in 'verl_omni/version/version'
[tool.setuptools.dynamic]
version = {file = "verl/version/version"}
version = {file = "verl_omni/version/version"}

# If you need to mimic `package_dir={'': '.'}`:
[tool.setuptools.package-dir]
"" = "."

# If you need to include specific non-Python data (like YAML files or version file):
# This is the rough equivalent of package_data={'': ['version/*'], 'verl': ['trainer/config/*.yaml']}
[tool.setuptools.package-data]
verl = [
verl_omni = [
"version/*",
"trainer/config/*.yaml",
"trainer/config/*/*.yaml",
"experimental/*/config/*.yaml",
"trainer/config/*/*/*.yaml",
]
4 changes: 3 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
# requirements.txt records the full set of dependencies for development
verl>=0.7.1
Comment thread
zhtmike marked this conversation as resolved.
accelerate
cachetools
codetiming
datasets
diffusers>=0.37.1
dill
hydra-core
liger-kernel
Expand All @@ -16,7 +19,6 @@ ray[default]
tensordict>=0.8.0,<=0.10.0,!=0.9.0
torchdata
transformers
# vllm==0.8.4
wandb
packaging>=20.0
uvicorn
Expand Down
10 changes: 3 additions & 7 deletions scripts/generate_trainer_config.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,6 @@ set -euox pipefail

# Define config specifications: "config_name:output_file:config_arg"
CONFIG_SPECS=(
"ppo_trainer:_generated_ppo_trainer.yaml:"
"ppo_trainer:_generated_ppo_megatron_trainer.yaml:model_engine=megatron"
"ppo_trainer:_generated_ppo_veomni_trainer.yaml:model_engine=veomni"
"ppo_trainer:_generated_ppo_torchtitan_trainer.yaml:model_engine=torchtitan"
"diffusion_trainer:_generated_diffusion_trainer.yaml:--config-name=diffusion_trainer.yaml"
)

Expand All @@ -16,12 +12,12 @@ generate_config() {
local output_file="$2"
local config_arg="$3"

local target_cfg="verl/trainer/config/${output_file}"
local target_cfg="verl_omni/trainer/config/${output_file}"
local tmp_header=$(mktemp)
local tmp_cfg=$(mktemp)

echo "# This reference configration yaml is automatically generated via 'scripts/generate_trainer_config.sh'" > "$tmp_header"
echo "# in which it invokes 'python3 scripts/print_cfg.py --cfg job ${config_arg}' to flatten the 'verl/trainer/config/${config_name}.yaml' config fields into a single file." >> "$tmp_header"
echo "# in which it invokes 'python3 scripts/print_cfg.py --cfg job ${config_arg}' to flatten the 'verl_omni/trainer/config/${config_name}.yaml' config fields into a single file." >> "$tmp_header"
echo "# Do not modify this file directly." >> "$tmp_header"
echo "# The file is usually only for reference and never used." >> "$tmp_header"
echo "" >> "$tmp_header"
Expand All @@ -43,7 +39,7 @@ done

for spec in "${CONFIG_SPECS[@]}"; do
IFS=':' read -r config_name output_file config_arg <<< "$spec"
target_cfg="verl/trainer/config/${output_file}"
target_cfg="verl_omni/trainer/config/${output_file}"
if ! git diff --exit-code -- "$target_cfg" >/dev/null; then
echo "✖ $target_cfg is out of date. Please regenerate via 'scripts/generate_trainer_config.sh' and commit the changes."
exit 1
Expand Down
8 changes: 2 additions & 6 deletions scripts/print_cfg.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,18 +17,14 @@
raise ImportError("Please install hydra-core via 'pip install hydra-core' and retry.") from e


@hydra.main(config_path="../verl/trainer/config", config_name="ppo_trainer", version_base=None)
@hydra.main(config_path="../verl_omni/trainer/config", config_name="diffusion_trainer", version_base=None)
def main(config):
"""Main entry point for PPO training with Hydra configuration management.
"""Main entry point for printing the resolved diffusion trainer config.

Args:
config_dict: Hydra configuration dictionary containing training parameters.
"""
print(config)
from verl.utils.config import omega_conf_to_dataclass

profiler_config = omega_conf_to_dataclass(config.critic.profiler)
print(profiler_config)


if __name__ == "__main__":
Expand Down
40 changes: 11 additions & 29 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,15 @@

version_folder = os.path.dirname(os.path.join(os.path.abspath(__file__)))

with open(os.path.join(version_folder, "verl/version/version")) as f:
with open(os.path.join(version_folder, "verl_omni/version/version")) as f:
__version__ = f.read().strip()

install_requires = [
"accelerate",
"cachetools",
"codetiming",
"datasets",
"diffusers",
"dill",
"hydra-core",
"numpy<2.0.0",
Expand All @@ -45,57 +47,37 @@
]

TEST_REQUIRES = ["pytest", "pre-commit", "py-spy", "pytest-asyncio", "pytest-rerunfailures"]
PRIME_REQUIRES = ["pyext"]
GEO_REQUIRES = ["mathruler", "torchvision", "qwen_vl_utils"]
GPU_REQUIRES = ["liger-kernel", "flash-attn"]
MATH_REQUIRES = ["math-verify"] # Add math-verify as an optional dependency
GPU_REQUIRES = ["flash-attn"]
VLLM_REQUIRES = ["tensordict>=0.8.0,<=0.10.0,!=0.9.0", "vllm>=0.8.5,<=0.12.0"]
Comment thread
zhtmike marked this conversation as resolved.
TRTLLM_REQUIRES = ["tensorrt-llm>=1.2.0rc6"]
SGLANG_REQUIRES = [
"tensordict>=0.8.0,<=0.10.0,!=0.9.0",
"sglang[srt,openai]==0.5.8",
"torch==2.9.1",
]
TRL_REQUIRES = ["trl<=0.9.6"]
MCORE_REQUIRES = ["mbridge"]
TRANSFERQUEUE_REQUIRES = ["TransferQueue==0.1.6"]

extras_require = {
"test": TEST_REQUIRES,
"prime": PRIME_REQUIRES,
"geo": GEO_REQUIRES,
"gpu": GPU_REQUIRES,
"math": MATH_REQUIRES,
"vllm": VLLM_REQUIRES,
"sglang": SGLANG_REQUIRES,
"trl": TRL_REQUIRES,
"mcore": MCORE_REQUIRES,
"trtllm": TRTLLM_REQUIRES,
"transferqueue": TRANSFERQUEUE_REQUIRES,
}


this_directory = Path(__file__).parent
long_description = (this_directory / "README.md").read_text()

setup(
name="verl",
name="verl-omni",
version=__version__,
package_dir={"": "."},
packages=find_packages(where="."),
url="https://github.com/verl-project/verl",
packages=find_packages(where=".", include=["verl_omni", "verl_omni.*"]),
url="https://github.com/verl-project/verl-omni",
license="Apache 2.0",
author="Bytedance - Seed - MLSys",
author_email="zhangchi.usc1992@bytedance.com, gmsheng@connect.hku.hk",
description="verl: Volcano Engine Reinforcement Learning for LLM",
author_email="yhuangch@cse.ust.hk",
description="verl-omni: Easy, fast, and stable RL training for diffusion and omni-modality models",
install_requires=install_requires,
extras_require=extras_require,
package_data={
"": ["version/*"],
"verl": [
"verl_omni": [
"trainer/config/*.yaml",
"trainer/config/*/*.yaml",
"experimental/*/config/*.yaml",
"trainer/config/*/*/*.yaml",
],
},
include_package_data=True,
Expand Down
7 changes: 7 additions & 0 deletions tests/experimental/agent_loop/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,13 @@
# limitations under the License.
import pytest

# TODO (mike): to be dropped once `verl` drops its legacy diffusion
# implementations.
try:
import verl_omni # noqa: F401
except ImportError:
pass


def pytest_configure(config):
config.addinivalue_line("markers", "vllm_omni: requires the vllm-omni package")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
import pytest
import ray
from omegaconf import DictConfig

from verl.experimental.agent_loop.agent_loop import AgentLoopManager
from verl.protocol import DataProto

Expand Down Expand Up @@ -59,7 +58,7 @@ def _create_tp_compatible_model(parent_dir, src_model_path, num_attention_heads=
def init_config() -> DictConfig:
from hydra import compose, initialize_config_dir

with initialize_config_dir(config_dir=os.path.abspath("verl/trainer/config")):
with initialize_config_dir(config_dir=os.path.abspath("verl_omni/trainer/config")):
config = compose(config_name="diffusion_trainer")

base_model_path = os.path.expanduser("~/models/tiny-random/Qwen-Image")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
import ray
import torch
from hydra import compose, initialize_config_dir

from verl.experimental.reward_loop import RewardLoopManager
from verl.protocol import DataProto
from verl.utils import hf_tokenizer
Expand Down Expand Up @@ -65,8 +64,8 @@ def test_reward_model_genrm():
}
}
)
with initialize_config_dir(config_dir=os.path.abspath("verl/trainer/config")):
config = compose(config_name="ppo_trainer")
with initialize_config_dir(config_dir=os.path.abspath("verl_omni/trainer/config")):
config = compose(config_name="diffusion_trainer")

rollout_model_name = os.path.expanduser("~/models/tiny-random/Qwen-Image")
reward_model_name = os.path.expanduser("~/models/tiny-random/qwen3-vl")
Expand Down Expand Up @@ -118,8 +117,8 @@ def test_rule_reward():
}
}
)
with initialize_config_dir(config_dir=os.path.abspath("verl/trainer/config")):
config = compose(config_name="ppo_trainer")
with initialize_config_dir(config_dir=os.path.abspath("verl_omni/trainer/config")):
config = compose(config_name="diffusion_trainer")

rollout_model_name = os.path.expanduser("~/models/tiny-random/Qwen-Image")

Expand Down
16 changes: 8 additions & 8 deletions tests/models/test_diffusers_fsdp_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,16 +19,17 @@
import pytest
import ray
import torch

from verl import DataProto
from verl.models.diffusion_model import build_scheduler
from verl.single_controller.ray import RayClassWithInitArgs, RayResourcePool, RayWorkerGroup
from verl.utils import tensordict_utils as tu
from verl.workers.config import DiffusionModelConfig, FSDPDiffusionActorConfig, TrainingWorkerConfig
from verl.workers.engine_workers import TrainingWorker
from verl.workers.utils.losses import diffusion_loss
from verl.workers.config import TrainingWorkerConfig
Comment thread
zhtmike marked this conversation as resolved.
from verl.workers.utils.padding import embeds_padding_2_no_padding

from verl_omni.models.diffusion_model import build_scheduler
from verl_omni.workers.config import DiffusionModelConfig, FSDPDiffusionActorConfig
from verl_omni.workers.engine_workers import TrainingWorker
from verl_omni.workers.utils.losses import diffusion_loss

EXTERNAL_LIB = "examples.flowgrpo_trainer.diffusers_impl.qwen_image"


Expand All @@ -44,10 +45,9 @@ def create_training_config(model_type, strategy, device_count, model):

if strategy in ["fsdp", "fsdp2"]:
from hydra import compose, initialize_config_dir

from verl.utils.config import omega_conf_to_dataclass

with initialize_config_dir(config_dir=os.path.abspath("verl/trainer/config/diffusion/model")):
with initialize_config_dir(config_dir=os.path.abspath("verl_omni/trainer/config/diffusion/model")):
cfg = compose(
config_name="diffusion_model",
overrides=[
Expand All @@ -63,7 +63,7 @@ def create_training_config(model_type, strategy, device_count, model):
)
model_config: DiffusionModelConfig = omega_conf_to_dataclass(cfg)

with initialize_config_dir(config_dir=os.path.abspath("verl/trainer/config/diffusion/actor")):
with initialize_config_dir(config_dir=os.path.abspath("verl_omni/trainer/config/diffusion/actor")):
cfg = compose(
config_name="dp_diffusion_actor",
overrides=[
Expand Down
Loading
Loading