verl-project · SamitHuang · Apr 22, 2026 · Apr 22, 2026 · Apr 22, 2026 · Apr 22, 2026
@@ -0,0 +1,39 @@
+### What does this PR do?
+
+> Add **concise** overview of what this PR aims to achieve or accomplish. Reference related GitHub issues and PRs that help with the review.
+
+### Checklist Before Starting
+
+- [ ] Search for similar PRs. Paste at least one query link here: ...
+- [ ] Format the PR title as `[{modules}] {type}: {description}` (This will be checked by the CI)
+  - `{modules}` include `fsdp`, `vllm_omni`, `rollout`, `trainer`, `ci`, `training_utils`, `recipe`, `ray`, `worker`, `single_controller`, `misc`, `perf`, `model`, `algo`, `env`, `tool`, `ckpt`, `doc`, `data`, `cfg`, `reward`, `diffusion`, `omni`, `tests`, `docker`
+  - If this PR involves multiple modules, separate them with `,` like `[diffusion, doc]`
+  - `{type}` is in `feat`, `fix`, `refactor`, `chore`, `test`
+  - If this PR breaks any API (CLI arguments, config, function signature, etc.), add `[BREAKING]` to the beginning of the title.
+  - Example: `[BREAKING][diffusion, fsdp] feat: new rollout scheduler`
+
+### Test
+
+> For changes that can not be tested by CI (e.g., algorithm implementation, new model support), validate by experiment(s) and show results like training curve plots, evaluation results, etc.
+
+### API and Usage Example
+
+> Demonstrate how the API changes if any, and provide usage example(s) if possible.
+
+```python
+# Add code snippet or script demonstrating how to use this
+```
+
+### Design & Code Changes
+
+> Demonstrate the high-level design if this PR is complex, and list the specific changes.
+
+### Checklist Before Submitting
+
+> [!IMPORTANT]
+> Please check all the following items before requesting a review, otherwise the reviewer might deprioritize this PR for review.
+
+- [ ] Read the [Contribute Guide](https://github.com/verl-project/verl-omni/blob/main/CONTRIBUTING.md).
+- [ ] Apply [pre-commit checks](https://github.com/verl-project/verl-omni/blob/main/CONTRIBUTING.md#code-linting-and-formatting): `pre-commit install && pre-commit run --all-files --show-diff-on-failure --color=always`
+- [ ] Add / Update the documentation.
+- [ ] Add unit or end-to-end test(s) to [the CI workflow](https://github.com/verl-project/verl-omni/tree/main/.github/workflows) to cover all the code. If not feasible, explain why: ...
@@ -0,0 +1,26 @@
+on:
+  pull_request:
+    types: [opened, edited, synchronize]
+
+jobs:
+  check-title:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.11'
+
+      - name: Run PR title checker
+        run: python3 tests/special_sanity/check_pr_title.py
+        env:
+          PR_TITLE: ${{ github.event.pull_request.title }}
+
+      - name: Run PR description checker
+        run: python3 tests/special_sanity/check_pr_description.py
+        env:
+          PR_TITLE: ${{ github.event.pull_request.title }}
+          GITHUB_EVENT_PATH: ${{ github.event_path }}
@@ -0,0 +1,45 @@
+name: sanity
+
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+    branches:
+      - main
+    paths:
+      - "**/*.py"
+      - .github/workflows/sanity.yml
+      - "tests/special_sanity/**"
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
+
+permissions:
+  contents: read
+
+jobs:
+  sanity:
+    runs-on: ubuntu-latest
+    timeout-minutes: 5
+    strategy:
+      matrix:
+        python-version: ["3.10"]
+    steps:
+      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install the current repository
+        run: |
+          pip3 install torch torchvision --index-url https://download.pytorch.org/whl/cpu
+          pip3 install -r requirements.txt
+          pip3 install -r requirements-test.txt
+          pip3 install --no-deps -e .
+      - name: Run sanity test
+        run: |
+          pytest -s -x tests/special_sanity
+      - name: Assert documentation requirement for functions
+        run: python3 tests/special_sanity/validate_imported_docs.py
@@ -0,0 +1,31 @@
+name: Type Annotation and Docstring Coverage
+
+on:
+  pull_request:
+    paths:
+      - '**/*.py'
+      - '.github/workflows/type-coverage-check.yml'
+
+jobs:
+  type-coverage-check:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0  # Important: fetch full history so `origin/main` is available
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.10'
+
+      - name: Install dependencies
+        run: |
+          pip3 install torch torchvision --index-url https://download.pytorch.org/whl/cpu
+          pip3 install -r requirements.txt
+          pip3 install --no-deps -e .
+      - name: Run type annotation coverage check
+        run: |
+          python3 tests/special_sanity/type_coverage_check.py
+      - name: Run docstring coverage check
+        run: |
+          python3 tests/special_sanity/check_api_docs.py verl
@@ -0,0 +1 @@
+drop later
@@ -0,0 +1,16 @@
+# Copyright 2026 Bytedance Ltd. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from .qwen_image import QwenImage
+
+__all__ = ["QwenImage"]
@@ -23,7 +23,7 @@
 from diffusers.pipelines.qwenimage.pipeline_qwenimage import calculate_shift
 from tensordict import TensorDict
 
-from verl.models.diffusers_model import DiffusionModelBase
+from verl.models.diffusion_model import DiffusionModelBase
 from verl.utils import tensordict_utils as tu
 from verl.utils.device import get_device_name
 from verl.workers.config import DiffusionModelConfig
@@ -78,7 +78,7 @@ def prepare_model_inputs(
         vae_scale_factor = tu.get_non_tensor_data(data=micro_batch, key="vae_scale_factor", default=None)
         img_shapes = [[(1, height // vae_scale_factor // 2, width // vae_scale_factor // 2)]] * latents.shape[0]
 
-        guidance_scale = model_config.extra_configs.get("guidance_scale", None)
+        guidance_scale = model_config.guidance_scale
         if getattr(module.config, "guidance_embeds", False):
             guidance = torch.full([1], guidance_scale, device=timesteps.device, dtype=torch.float32)
         else:
@@ -125,7 +125,7 @@ def forward_and_sample_previous_step(
         timesteps = scheduler_inputs["all_timesteps"]
 
         noise_pred = module(**model_inputs)[0]
-        true_cfg_scale = model_config.extra_configs.get("true_cfg_scale", 1.0)
+        true_cfg_scale = model_config.true_cfg_scale
         if true_cfg_scale > 1.0:
             assert negative_model_inputs is not None
             neg_noise_pred = module(**negative_model_inputs)[0]
@@ -138,9 +138,9 @@ def forward_and_sample_previous_step(
             sample=latents[:, step].float(),
             model_output=noise_pred.float(),
             timestep=timesteps[:, step],
-            noise_level=model_config.extra_configs.get("noise_level", None),
+            noise_level=model_config.algo.noise_level,
             prev_sample=latents[:, step + 1].float(),
-            sde_type=model_config.extra_configs.get("sde_type", None),
+            sde_type=model_config.algo.sde_type,
             return_logprobs=True,
         )
         return log_prob, prev_sample_mean, std_dev_t
@@ -19,7 +19,7 @@ python3 -m verl.trainer.main_flowgrpo \
     data.max_prompt_length=256 \
     actor_rollout_ref.model.path=$HOME/models/Qwen/Qwen-Image \
     actor_rollout_ref.model.tokenizer_path=$HOME/models/Qwen/Qwen-Image/tokenizer \
-    actor_rollout_ref.model.external_lib="examples.flowgrpo_trainer.diffusers.qwen_image" \
+    actor_rollout_ref.model.external_lib="examples.flowgrpo_trainer.diffusers_impl" \
     actor_rollout_ref.model.lora_rank=64 \
     actor_rollout_ref.model.lora_alpha=128 \
     actor_rollout_ref.model.target_modules="['to_q','to_k','to_v','to_out.0','add_q_proj','add_k_proj','add_v_proj','to_add_out','img_mlp.net.0.proj','img_mlp.net.2','txt_mlp.net.0.proj','txt_mlp.net.2']" \
@@ -30,24 +30,23 @@ python3 -m verl.trainer.main_flowgrpo \
     actor_rollout_ref.actor.fsdp_config.param_offload=True \
     actor_rollout_ref.actor.fsdp_config.optimizer_offload=True \
     actor_rollout_ref.actor.fsdp_config.model_dtype=bfloat16 \
-    actor_rollout_ref.actor.policy_loss.loss_mode=flow_grpo \
+    actor_rollout_ref.actor.diffusion_loss.loss_mode=flow_grpo \
     actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=32 \
     actor_rollout_ref.rollout.tensor_model_parallel_size=1 \
     actor_rollout_ref.rollout.name=$ENGINE \
     actor_rollout_ref.rollout.n=16 \
-    actor_rollout_ref.rollout.agent.default_agent_loop=diffusion_single_turn_agent \
     actor_rollout_ref.rollout.agent.num_workers=4 \
     actor_rollout_ref.rollout.load_format=safetensors \
     actor_rollout_ref.rollout.layered_summon=True \
+    actor_rollout_ref.rollout.true_cfg_scale=4.0 \
+    actor_rollout_ref.rollout.max_sequence_length=256 \
+    actor_rollout_ref.rollout.algo.noise_level=1.2 \
+    actor_rollout_ref.rollout.algo.sde_type="sde" \
+    actor_rollout_ref.rollout.algo.sde_window_size=2 \
+    actor_rollout_ref.rollout.algo.sde_window_range="[0,5]" \
     actor_rollout_ref.rollout.val_kwargs.num_inference_steps=50 \
-    +actor_rollout_ref.rollout.extra_configs.true_cfg_scale=4.0 \
-    +actor_rollout_ref.rollout.extra_configs.noise_level=1.2 \
-    +actor_rollout_ref.rollout.extra_configs.sde_type="sde" \
-    +actor_rollout_ref.rollout.extra_configs.sde_window_size=2 \
-    +actor_rollout_ref.rollout.extra_configs.sde_window_range="[0,5]" \
-    +actor_rollout_ref.rollout.extra_configs.max_sequence_length=256 \
-    +actor_rollout_ref.rollout.val_kwargs.extra_configs.noise_level=0.0 \
-    +actor_rollout_ref.rollout.engine_kwargs.vllm_omni.custom_pipeline=examples.flowgrpo_trainer.vllm_omni.pipeline_qwenimage.QwenImagePipelineWithLogProb \
+    actor_rollout_ref.rollout.val_kwargs.algo.noise_level=0.0 \
+    actor_rollout_ref.rollout.external_lib=examples.flowgrpo_trainer.vllm_omni_impl \
     actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=32 \
     reward.num_workers=4 \
     reward.reward_manager.name=visual \

@@ -0,0 +1,16 @@
+# Copyright 2026 Bytedance Ltd. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from .pipeline_qwenimage import QwenImagePipelineWithLogProb
+
+__all__ = ["QwenImagePipelineWithLogProb"]
@@ -20,6 +20,8 @@
 from vllm_omni.diffusion.models.qwen_image import QwenImagePipeline
 from vllm_omni.diffusion.request import OmniDiffusionRequest
 
+from verl.models.diffusion_model import VllmOmniPipelineBase
+
 from ..scheduler import FlowMatchSDEDiscreteScheduler
 
 
@@ -35,6 +37,7 @@ def _coalesce_not_none(value, default):
 
 # Custom pipeline class for QwenImage that returns log probabilities during the diffusion process.
 # This is compatible with API of vllm-omni custom pipeline
+@VllmOmniPipelineBase.register("QwenImagePipeline")
 class QwenImagePipelineWithLogProb(QwenImagePipeline):
     def __init__(self, *, od_config: OmniDiffusionConfig, prefix: str = ""):
         super().__init__(od_config=od_config, prefix=prefix)

@@ -0,0 +1,114 @@
+# -------------------------------
+# build-system
+# -------------------------------
+[build-system]
+requires = [
+    "setuptools>=61.0",
+    "wheel"
+]
+build-backend = "setuptools.build_meta"
+
+# -------------------------------
+# project (PEP 621 metadata)
+# -------------------------------
+[project]
+name = "verl-omni"
+# We'll mark the version as "dynamic" because it's read from the file "verl/version/version" 
+# (PEP 621 calls this "dynamic version"). 
+# The actual version is specified in the [tool.setuptools.dynamic] section below.
+dynamic = ["version", "dependencies", "optional-dependencies", "authors", "urls"]
+
+description = "verl-omni: Easy, fast, and stable RL training for diffusion and omni-modality models"
+license = {text = "Apache-2.0"}
+readme = {file = "README.md", content-type = "text/markdown"}
+requires-python = ">=3.10"
+
+# -------------------------------
+# tool.ruff - Linting configuration
+# -------------------------------
+[tool.ruff]
+# Note: While the formatter will attempt to format lines such that they remain within the line-length,
+# it isn't a hard upper bound, and formatted lines may exceed the line-length.
+line-length = 120
+exclude = ["scripts/legacy_model_merger.py"]
+
+[tool.ruff.lint]
+isort = {known-first-party = ["verl"]}
+# c.f. https://github.com/vllm-project/vllm/blob/ce8d6b75fc0586045df75ee1568a5b5f9957251b/pyproject.toml
+select = [
+    # pycodestyle
+    "E",
+    # Pyflakes
+    "F",
+    # pyupgrade
+    "UP",
+    # flake8-bugbear
+    "B",
+    # isort
+    "I",
+    "G",
+]
+ignore = [
+    # star imports
+    "F405", "F403",
+    # lambda expression assignment
+    "E731",
+    # Loop control variable not used within loop body
+    "B007",
+    # f-string format
+    "UP032",
+    # `.log()` statement uses f-string
+    "G004",
+    # X | None for type annotations
+    "UP045",
+    # deprecated import
+    "UP035",
+]
+
+# -------------------------------
+# tool.mypy - typechecking config
+# -------------------------------
+[tool.mypy]
+pretty            = true
+ignore_missing_imports = true
+explicit_package_bases = true
+follow_imports = "skip"
+
+# Blanket silence
+ignore_errors = true
+
+[[tool.mypy.overrides]]
+module = [
+"verl.trainer.config.algorithm",
+"verl.trainer.ppo.core_algos",
+"verl.trainer.ppo.reward",
+"verl.workers.reward_manager",
+"verl.workers.reward_manager.*",
+]
+ignore_errors = false
+
+# -------------------------------
+# tool.setuptools - Additional config
+# -------------------------------
+[tool.setuptools]
+# True means `setuptools` will attempt to include all relevant files in package_data automatically.
+# This corresponds to `include_package_data=True` in setup.py.
+include-package-data = true
+
+# We read the version from a file in 'verl/version/version'
+[tool.setuptools.dynamic]
+version = {file = "verl/version/version"}
+
+# If you need to mimic `package_dir={'': '.'}`:
+[tool.setuptools.package-dir]
+"" = "."
+
+# If you need to include specific non-Python data (like YAML files or version file):
+# This is the rough equivalent of package_data={'': ['version/*'], 'verl': ['trainer/config/*.yaml']}
+[tool.setuptools.package-data]
+verl = [
+  "version/*",
+  "trainer/config/*.yaml",
+  "trainer/config/*/*.yaml",
+  "experimental/*/config/*.yaml",
+]