diff --git a/.github/workflows/dataset.yml b/.github/workflows/dataset.yml index e99b73432e1..7ab18eca046 100644 --- a/.github/workflows/dataset.yml +++ b/.github/workflows/dataset.yml @@ -49,10 +49,10 @@ jobs: run: | [ ! -d "$HOME/verl-data" ] && git clone --depth 1 https://github.com/eric-haibin-lin/verl-data ~/verl-data python3 examples/data_preprocess/geo3k.py - pytest -s -x tests/verl/utils/dataset/test_rl_dataset.py - pytest -s -x tests/verl/utils/dataset/test_sft_dataset.py - pytest -s -x tests/verl/utils/test_import_utils.py - # pytest -s -x tests/verl/utils/dataset/test_rm_dataset.py + pytest -s -x tests/utils/dataset/test_rl_dataset.py + pytest -s -x tests/utils/dataset/test_sft_dataset.py + pytest -s -x tests/utils/test_import_utils.py + # pytest -s -x tests/utils/dataset/test_rm_dataset.py - name: Running ray test using cupy (move it to L20 when dockerfile ready) run: | cd tests/ray_gpu diff --git a/.github/workflows/model.yml b/.github/workflows/model.yml index 2013f97da47..6193321a3e7 100644 --- a/.github/workflows/model.yml +++ b/.github/workflows/model.yml @@ -15,8 +15,8 @@ on: - "verl/**/*.py" # Entrypoints - ".github/workflows/model.yml" - - "tests/checkpoint/test_fsdp_ckpt.py" - - "tests/model/test_transformers_ulysses.py" + - "tests/utils/checkpoint/test_fsdp_ckpt.py" + - "tests/models/test_transformers_ulysses.py" - "tests/distributed/run_all.sh" # Declare permissions just read content. @@ -46,37 +46,37 @@ jobs: pip3 install --upgrade transformers - name: Running rmpad model tests on 8 L20 GPUs + flash_attn 2.5.8 run: | - pytest -s tests/model/test_transformer.py + pytest -s tests/models/test_transformer.py - name: Running rmpad model tests on 8 L20 GPUs + latest flash_attn run: | pip3 install --upgrade flash_attn --no-build-isolation - pytest -s tests/model/test_transformer.py + pytest -s tests/models/test_transformer.py - name: Running FSDP rmpad model tests on 8 L20 GPUs + latest flash_attn run: | - STRATEGY=fsdp torchrun --nproc_per_node=8 tests/checkpoint/test_fsdp_ckpt.py + STRATEGY=fsdp torchrun --nproc_per_node=8 tests/utils/checkpoint/test_fsdp_ckpt.py - name: Running transformers ulysses tests on 8 L20 GPUs + latest transformers run: | - torchrun --nproc_per_node=8 -m pytest tests/model/test_transformers_ulysses.py + torchrun --nproc_per_node=8 -m pytest tests/models/test_transformers_ulysses.py - name: Running transformers ulysses tests on 8 L20 GPUs + transformers 4.49.0 run: | pip3 install transformers==4.49.0 - torchrun --nproc_per_node=8 -m pytest tests/model/test_transformers_ulysses.py + torchrun --nproc_per_node=8 -m pytest tests/models/test_transformers_ulysses.py - name: Running transformers ulysses tests on 8 L20 GPUs + transformers 4.48.0 run: | pip3 install transformers==4.48.0 - torchrun --nproc_per_node=8 -m pytest tests/model/test_transformers_ulysses.py + torchrun --nproc_per_node=8 -m pytest tests/models/test_transformers_ulysses.py - name: Running transformers ulysses tests on 8 L20 GPUs + transformers 4.47.0 run: | pip3 install transformers==4.47.0 - torchrun --nproc_per_node=8 -m pytest tests/model/test_transformers_ulysses.py + torchrun --nproc_per_node=8 -m pytest tests/models/test_transformers_ulysses.py - name: Running transformers ulysses tests on 8 L20 GPUs + transformers 4.46.0 run: | pip3 install transformers==4.46.0 - torchrun --nproc_per_node=8 -m pytest tests/model/test_transformers_ulysses.py + torchrun --nproc_per_node=8 -m pytest tests/models/test_transformers_ulysses.py - name: Running transformers ulysses tests on 8 L20 GPUs + transformers 4.45.0 run: | pip3 install transformers==4.45.0 - torchrun --nproc_per_node=8 -m pytest tests/model/test_transformers_ulysses.py + torchrun --nproc_per_node=8 -m pytest tests/models/test_transformers_ulysses.py - name: Run distributed test run: | bash tests/distributed/run_all.sh @@ -106,4 +106,4 @@ jobs: - name: Running FSDP2 rmpad model tests on 8 L20 GPUs + latest flash_attn run: | pip3 install --upgrade flash_attn --no-build-isolation - STRATEGY=fsdp2 torchrun --nproc_per_node=8 tests/checkpoint/test_fsdp_ckpt.py + STRATEGY=fsdp2 torchrun --nproc_per_node=8 tests/utils/checkpoint/test_fsdp_ckpt.py diff --git a/.github/workflows/sgl.yml b/.github/workflows/sgl.yml index 510847f48c5..6be11e8dda4 100644 --- a/.github/workflows/sgl.yml +++ b/.github/workflows/sgl.yml @@ -68,13 +68,13 @@ jobs: pip3 install -e .[test,gpu,sglang] --no-deps - name: Test the latest SGLang run: | - cd tests/rollout + cd tests/workers/rollout torchrun --nnodes=1 --nproc_per_node=4 $(which pytest) -s test_sglang_spmd.py - name: Test the latest SGLang async run: | - cd tests/rollout + cd tests/workers/rollout torchrun --nnodes=1 --nproc_per_node=2 $(which pytest) -s test_sglang_async_spmd.py - name: Test the latest SGLang Rollout async with tool run: | - cd tests/rollout + cd tests/workers/rollout torchrun --nnodes=1 --nproc_per_node=2 $(which pytest) -s test_sglang_async_rollout_w_tools.py diff --git a/.github/workflows/verl_unit_test.yml b/.github/workflows/verl_unit_test.yml index 3583292082e..2e0b147efcf 100644 --- a/.github/workflows/verl_unit_test.yml +++ b/.github/workflows/verl_unit_test.yml @@ -43,9 +43,9 @@ jobs: pip install -e .[test] - name: Running test protocol.py run: | - cd tests/verl + cd tests pytest -s -x test_protocol.py - - name: Running utils tests + - name: running utils tests run: | - cd tests/verl/utils - pytest -s -x --ignore=dataset/ --ignore=test_torch_functional.py . + cd tests/utils + pytest -s -x --ignore=dataset/ --ignore=checkpoint/ --ignore=test_flops_counter.py --ignore=test_torch_functional.py . diff --git a/.github/workflows/vllm.yml b/.github/workflows/vllm.yml index 295eb662321..e6cf582e0df 100644 --- a/.github/workflows/vllm.yml +++ b/.github/workflows/vllm.yml @@ -28,7 +28,7 @@ on: - "!**/*sglang*" # Entrypoints - ".github/workflows/vllm.yml" - - "tests/generation" + - "tests/e2e/generation" - "tests/rollout" - "verl/trainer/main_generation.py" - "verl/trainer/config/generation.yaml" @@ -72,26 +72,26 @@ jobs: # Disable requests to avoid network errors - name: Running vllm tests on 8 L20 GPUs run: | - cd tests/rollout + cd tests/workers/rollout torchrun --standalone --nnodes=1 --nproc_per_node=8 $(which pytest) -s test_vllm_hf_loader.py - name: Test the latest vLLM run: | pip3 install --upgrade vllm==0.7.3 - cd tests/rollout + cd tests/workers/rollout torchrun --standalone --nnodes=1 --nproc_per_node=4 $(which pytest) -s test_vllm_spmd.py - name: Run Qwen 0.5B generation test run: | - cd tests/generation + cd tests/e2e/generation export OUTPUT_PATH="${HOME}/data/gen/qwen_05_gen_test.parquet" MODEL_ID=Qwen/Qwen2.5-0.5B-Instruct NGPUS_PER_NODE=4 GEN_TP=2 bash ./run_gen_qwen05.sh rm -rf "${OUTPUT_PATH}" - name: Run Qwen 0.5B generation test when world_size == 1 run: | - cd tests/generation + cd tests/e2e/generation export OUTPUT_PATH="${HOME}/data/gen/qwen_05_gen_test.parquet" MODEL_ID=Qwen/Qwen2.5-0.5B-Instruct NGPUS_PER_NODE=1 GEN_TP=1 bash ./run_gen_qwen05.sh rm -rf "${OUTPUT_PATH}" - name: Running multi-turn rollout tests on 8 L20 GPUs run: | pip3 install --upgrade vllm==0.8.3 tensordict==0.7.2 - python3 tests/rollout/test_vllm_multi_turn.py + python3 tests/workers/rollout/test_vllm_multi_turn.py diff --git a/.gitignore b/.gitignore index 98df3516ee6..f444f0e4c9d 100644 --- a/.gitignore +++ b/.gitignore @@ -109,9 +109,6 @@ ENV/ # Mac .DS_Store -# output logs -tests/e2e/toy_examples/deepspeed/synchronous/output.txt - # vim *.swp diff --git a/docs/api/trainer.rst b/docs/api/trainer.rst new file mode 100644 index 00000000000..d890b7341c6 --- /dev/null +++ b/docs/api/trainer.rst @@ -0,0 +1,21 @@ +Trainers +========================= + +Trainers drive the training loop. Introducing new trainer classes in case of new training paradiam is encouraged. + +.. autosummary:: + :nosignatures: + + verl.trainer.ppo.ray_trainer.RayPPOTrainer + + +Core APIs +~~~~~~~~~~~~~~~~~ + +.. autoclass:: verl.trainer.ppo.ray_trainer.RayPPOTrainer + +.. automodule:: verl.utils.tokenizer + :members: hf_tokenizer + +.. automodule:: verl.single_controller + :members: Worker, WorkerGroup, ClassWithInitArgs, ResourcePool diff --git a/docs/perf/perf_tuning.rst b/docs/perf/perf_tuning.rst index fa2d416d69f..9a509ce1102 100644 --- a/docs/perf/perf_tuning.rst +++ b/docs/perf/perf_tuning.rst @@ -51,12 +51,12 @@ Currently, for llama, mistral, gemma1 and qwen based models, users can enable `u sequence packing implementation provided by transformers library. For other models, transformers library may also support it but we haven't tested it yet. -Users can add the desired model config to the `test_transformer.py `_ file. +Users can add the desired model config to the `test_transformer.py `_ file. And test its functionaility by running the following command: .. code-block:: bash - pytest -s tests/model/test_transformer.py + pytest -s tests/models/test_transformer.py If the test passes, you can add your desired model into the model `registry.py `_ file. Then, you can enjoy the performance boost of sequence packing diff --git a/recipe/dapo/src/config/dapo_trainer.yaml b/recipe/dapo/config/dapo_trainer.yaml similarity index 100% rename from recipe/dapo/src/config/dapo_trainer.yaml rename to recipe/dapo/config/dapo_trainer.yaml diff --git a/recipe/dapo/src/dapo_ray_trainer.py b/recipe/dapo/dapo_ray_trainer.py similarity index 100% rename from recipe/dapo/src/dapo_ray_trainer.py rename to recipe/dapo/dapo_ray_trainer.py diff --git a/recipe/dapo/src/main_dapo.py b/recipe/dapo/main_dapo.py similarity index 100% rename from recipe/dapo/src/main_dapo.py rename to recipe/dapo/main_dapo.py diff --git a/recipe/dapo/run_dapo_early_qwen2.5_32b.sh b/recipe/dapo/run_dapo_early_qwen2.5_32b.sh index 8ddfcb9445f..c7bd5c189e5 100644 --- a/recipe/dapo/run_dapo_early_qwen2.5_32b.sh +++ b/recipe/dapo/run_dapo_early_qwen2.5_32b.sh @@ -58,7 +58,7 @@ gen_tp=4 ray job submit --no-wait --runtime-env="${RUNTIME_ENV}" \ --working-dir "${WORKING_DIR}" \ - -- python3 -m recipe.dapo.src.main_dapo \ + -- python3 -m recipe.dapo.main_dapo \ data.train_files="${TRAIN_FILE}" \ data.val_files="${TEST_FILE}" \ data.prompt_key=prompt \ @@ -125,4 +125,4 @@ ray job submit --no-wait --runtime-env="${RUNTIME_ENV}" \ trainer.save_freq=5 \ trainer.total_epochs=1 \ trainer.default_local_dir="${CKPTS_DIR}" \ - trainer.resume_mode=auto \ No newline at end of file + trainer.resume_mode=auto diff --git a/recipe/dapo/run_dapo_qwen2.5_32b.sh b/recipe/dapo/run_dapo_qwen2.5_32b.sh index 4a85e0f2fcf..6eec26c80e0 100644 --- a/recipe/dapo/run_dapo_qwen2.5_32b.sh +++ b/recipe/dapo/run_dapo_qwen2.5_32b.sh @@ -58,7 +58,7 @@ gen_tp=4 ray job submit --no-wait --runtime-env="${RUNTIME_ENV}" \ --working-dir "${WORKING_DIR}" \ - -- python3 -m recipe.dapo.src.main_dapo \ + -- python3 -m recipe.dapo.main_dapo \ data.train_files="${TRAIN_FILE}" \ data.val_files="${TEST_FILE}" \ data.prompt_key=prompt \ @@ -127,4 +127,4 @@ ray job submit --no-wait --runtime-env="${RUNTIME_ENV}" \ trainer.save_freq=5 \ trainer.total_epochs=1 \ trainer.default_local_dir="${CKPTS_DIR}" \ - trainer.resume_mode=auto \ No newline at end of file + trainer.resume_mode=auto diff --git a/recipe/dapo/run_dapo_wo_ds_qwen2.5_32b.sh b/recipe/dapo/run_dapo_wo_ds_qwen2.5_32b.sh index e1699695061..6064b5be6c1 100644 --- a/recipe/dapo/run_dapo_wo_ds_qwen2.5_32b.sh +++ b/recipe/dapo/run_dapo_wo_ds_qwen2.5_32b.sh @@ -56,7 +56,7 @@ gen_tp=4 ray job submit --no-wait --runtime-env="${RUNTIME_ENV}" \ --working-dir "${WORKING_DIR}" \ - -- python3 -m recipe.dapo.src.main_dapo \ + -- python3 -m recipe.dapo.main_dapo \ data.train_files="${TRAIN_FILE}" \ data.val_files="${TEST_FILE}" \ data.prompt_key=prompt \ @@ -122,4 +122,4 @@ ray job submit --no-wait --runtime-env="${RUNTIME_ENV}" \ trainer.save_freq=5 \ trainer.total_epochs=1 \ trainer.default_local_dir="${CKPTS_DIR}" \ - trainer.resume_mode=auto \ No newline at end of file + trainer.resume_mode=auto diff --git a/recipe/dapo/test_dapo_7b.sh b/recipe/dapo/test_dapo_7b.sh index 12a60cbabb2..fe5cb297bc1 100644 --- a/recipe/dapo/test_dapo_7b.sh +++ b/recipe/dapo/test_dapo_7b.sh @@ -55,7 +55,7 @@ offload=False ray job submit --no-wait --runtime-env="${RUNTIME_ENV}" \ --working-dir "${WORKING_DIR}" \ - -- python3 -m recipe.dapo.src.main_dapo \ + -- python3 -m recipe.dapo.main_dapo \ data.train_files="${TRAIN_FILE}" \ data.val_files="${TEST_FILE}" \ data.prompt_key=prompt \ diff --git a/tests/generation/run_gen_qwen05.sh b/tests/e2e/generation/run_gen_qwen05.sh similarity index 100% rename from tests/generation/run_gen_qwen05.sh rename to tests/e2e/generation/run_gen_qwen05.sh diff --git a/tests/e2e/run_dapo.sh b/tests/e2e/run_dapo.sh index a4726140455..c37063a0020 100644 --- a/tests/e2e/run_dapo.sh +++ b/tests/e2e/run_dapo.sh @@ -41,7 +41,7 @@ gen_prompt_bsz=$((train_prompt_bsz * 4)) exp_name="$(basename "${MODEL_ID,,}")-dapo-minimal" -python3 -m recipe.dapo.src.main_dapo \ +python3 -m recipe.dapo.main_dapo \ data.train_files="${HOME}/data/gsm8k/train.parquet" \ data.val_files="${HOME}/data/gsm8k/test.parquet" \ reward_model.reward_manager=dapo \ diff --git a/tests/model/test_transformer.py b/tests/models/test_transformer.py similarity index 100% rename from tests/model/test_transformer.py rename to tests/models/test_transformer.py diff --git a/tests/model/test_transformers_ulysses.py b/tests/models/test_transformers_ulysses.py similarity index 100% rename from tests/model/test_transformers_ulysses.py rename to tests/models/test_transformers_ulysses.py diff --git a/tests/verl/test_decorator.py b/tests/single_controller/base/test_decorator.py similarity index 100% rename from tests/verl/test_decorator.py rename to tests/single_controller/base/test_decorator.py diff --git a/tests/verl/test_protocol.py b/tests/test_protocol.py similarity index 100% rename from tests/verl/test_protocol.py rename to tests/test_protocol.py diff --git a/tests/utility/test_timeout_decorator.py b/tests/utility/test_timeout_decorator.py index e9f78a7c6c9..57b563bce69 100644 --- a/tests/utility/test_timeout_decorator.py +++ b/tests/utility/test_timeout_decorator.py @@ -12,17 +12,14 @@ # See the License for the specific language governing permissions and # limitations under the License. -import time -import os -import sys import multiprocessing -import queue -import pytest # Import pytest -from functools import wraps +import sys +import threading +import time +import pytest # Import pytest from verl.utils.py_functional import timeout_limit as timeout -import threading # --- Test Task Functions --- TEST_TIMEOUT_SECONDS = 1.5 # Timeout duration for tests diff --git a/tests/checkpoint/test_fsdp_ckpt.py b/tests/utils/checkpoint/test_fsdp_ckpt.py similarity index 100% rename from tests/checkpoint/test_fsdp_ckpt.py rename to tests/utils/checkpoint/test_fsdp_ckpt.py diff --git a/tests/verl/utils/dataset/test_multiturn_sft_dataset.py b/tests/utils/dataset/test_multiturn_sft_dataset.py similarity index 100% rename from tests/verl/utils/dataset/test_multiturn_sft_dataset.py rename to tests/utils/dataset/test_multiturn_sft_dataset.py diff --git a/tests/verl/utils/dataset/test_rl_dataset.py b/tests/utils/dataset/test_rl_dataset.py similarity index 100% rename from tests/verl/utils/dataset/test_rl_dataset.py rename to tests/utils/dataset/test_rl_dataset.py diff --git a/tests/verl/utils/dataset/test_rm_dataset.py b/tests/utils/dataset/test_rm_dataset.py similarity index 100% rename from tests/verl/utils/dataset/test_rm_dataset.py rename to tests/utils/dataset/test_rm_dataset.py diff --git a/tests/verl/utils/dataset/test_sft_dataset.py b/tests/utils/dataset/test_sft_dataset.py similarity index 100% rename from tests/verl/utils/dataset/test_sft_dataset.py rename to tests/utils/dataset/test_sft_dataset.py diff --git a/tests/verl/test_flops_counter.py b/tests/utils/test_flops_counter.py similarity index 99% rename from tests/verl/test_flops_counter.py rename to tests/utils/test_flops_counter.py index c0420989783..c8d3589e9d3 100644 --- a/tests/verl/test_flops_counter.py +++ b/tests/utils/test_flops_counter.py @@ -12,9 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -import pytest import math -import json + +import pytest + from verl.utils.flops_counter import FlopsCounter VALID_CONFIG_TYPE = {"llama", "qwen2", "qwen3", "qwen3_moe", "deepseek_v3"} diff --git a/tests/verl/utils/test_import_utils.py b/tests/utils/test_import_utils.py similarity index 100% rename from tests/verl/utils/test_import_utils.py rename to tests/utils/test_import_utils.py diff --git a/tests/verl/utils/test_model.py b/tests/utils/test_model.py similarity index 100% rename from tests/verl/utils/test_model.py rename to tests/utils/test_model.py diff --git a/tests/verl/utils/test_module.py b/tests/utils/test_module.py similarity index 100% rename from tests/verl/utils/test_module.py rename to tests/utils/test_module.py diff --git a/tests/rollout/async_rollout_utils.py b/tests/workers/rollout/async_rollout_utils.py similarity index 100% rename from tests/rollout/async_rollout_utils.py rename to tests/workers/rollout/async_rollout_utils.py diff --git a/tests/rollout/run_fsdp_vllm.py b/tests/workers/rollout/run_fsdp_vllm.py similarity index 100% rename from tests/rollout/run_fsdp_vllm.py rename to tests/workers/rollout/run_fsdp_vllm.py diff --git a/tests/rollout/test_hf_rollout.py b/tests/workers/rollout/test_hf_rollout.py similarity index 100% rename from tests/rollout/test_hf_rollout.py rename to tests/workers/rollout/test_hf_rollout.py diff --git a/tests/rollout/test_sglang_async_rollout_w_tools.py b/tests/workers/rollout/test_sglang_async_rollout_w_tools.py similarity index 100% rename from tests/rollout/test_sglang_async_rollout_w_tools.py rename to tests/workers/rollout/test_sglang_async_rollout_w_tools.py diff --git a/tests/rollout/test_sglang_async_spmd.py b/tests/workers/rollout/test_sglang_async_spmd.py similarity index 100% rename from tests/rollout/test_sglang_async_spmd.py rename to tests/workers/rollout/test_sglang_async_spmd.py diff --git a/tests/rollout/test_sglang_spmd.py b/tests/workers/rollout/test_sglang_spmd.py similarity index 100% rename from tests/rollout/test_sglang_spmd.py rename to tests/workers/rollout/test_sglang_spmd.py diff --git a/tests/rollout/test_vllm_hf_loader.py b/tests/workers/rollout/test_vllm_hf_loader.py similarity index 100% rename from tests/rollout/test_vllm_hf_loader.py rename to tests/workers/rollout/test_vllm_hf_loader.py diff --git a/tests/rollout/test_vllm_multi_turn.py b/tests/workers/rollout/test_vllm_multi_turn.py similarity index 98% rename from tests/rollout/test_vllm_multi_turn.py rename to tests/workers/rollout/test_vllm_multi_turn.py index 27077224a13..b705d86a9ca 100644 --- a/tests/rollout/test_vllm_multi_turn.py +++ b/tests/workers/rollout/test_vllm_multi_turn.py @@ -21,7 +21,7 @@ from openai.types.chat.chat_completion import ChatCompletion from vllm.entrypoints.openai.protocol import ChatCompletionRequest, ChatCompletionResponse, ChatCompletionStreamResponse, ErrorResponse -from tests.rollout.async_rollout_utils import init_async_rollout_manager +from tests.workers.rollout.async_rollout_utils import init_async_rollout_manager from verl.protocol import DataProto diff --git a/tests/rollout/test_vllm_spmd.py b/tests/workers/rollout/test_vllm_spmd.py similarity index 100% rename from tests/rollout/test_vllm_spmd.py rename to tests/workers/rollout/test_vllm_spmd.py diff --git a/tests/rollout/test_vllm_tool_calling.py b/tests/workers/rollout/test_vllm_tool_calling.py similarity index 100% rename from tests/rollout/test_vllm_tool_calling.py rename to tests/workers/rollout/test_vllm_tool_calling.py diff --git a/tests/rollout/utils_sglang.py b/tests/workers/rollout/utils_sglang.py similarity index 100% rename from tests/rollout/utils_sglang.py rename to tests/workers/rollout/utils_sglang.py diff --git a/verl/single_controller/base/worker.py b/verl/single_controller/base/worker.py index 8acb4b6a040..7e7a3f2d9ae 100644 --- a/verl/single_controller/base/worker.py +++ b/verl/single_controller/base/worker.py @@ -136,6 +136,7 @@ def _configure_before_init(self, register_center_name: str, rank: int): def __init__(self, cuda_visible_devices=None) -> None: # construct a meta from environment variable. Note that the import must be inside the class because it is executed remotely import os + import torch from packaging import version diff --git a/verl/utils/megatron_utils.py b/verl/utils/megatron_utils.py index 60b220b52c0..a16b32ca4d3 100644 --- a/verl/utils/megatron_utils.py +++ b/verl/utils/megatron_utils.py @@ -31,9 +31,9 @@ from megatron.core.utils import get_attr_wrapped_model from transformers import PretrainedConfig -from verl.utils.torch_dtypes import PrecisionType -from verl.utils.model import normalize_model_name import verl.utils.megatron.tensor_parallel as tp_utils +from verl.utils.model import normalize_model_name +from verl.utils.torch_dtypes import PrecisionType def get_model_config(model): diff --git a/verl/utils/reward_score/math_dapo.py b/verl/utils/reward_score/math_dapo.py index d48f68bf4ec..33a699e5641 100644 --- a/verl/utils/reward_score/math_dapo.py +++ b/verl/utils/reward_score/math_dapo.py @@ -14,9 +14,9 @@ # Adapted from https://github.com/EleutherAI/lm-evaluation-harness/blob/main/lm_eval/tasks/hendrycks_math/utils.py import re -import signal from typing import Optional + def last_boxed_only_string(string: str) -> Optional[str]: """Extract the last LaTeX boxed expression from a string. diff --git a/verl/utils/reward_score/prime_math/__init__.py b/verl/utils/reward_score/prime_math/__init__.py index b29a6dc971c..f24c78b917a 100644 --- a/verl/utils/reward_score/prime_math/__init__.py +++ b/verl/utils/reward_score/prime_math/__init__.py @@ -28,9 +28,10 @@ from pylatexenc import latex2text from sympy.parsing import sympy_parser +from verl.utils.py_functional import timeout_limit + from . import math_normalize from .grader import math_equal -from verl.utils.py_functional import timeout_limit # import math_normalize # from grader import math_equal diff --git a/verl/utils/reward_score/prime_math/grader.py b/verl/utils/reward_score/prime_math/grader.py index 55048ac8dbe..e2d5fe4862c 100644 --- a/verl/utils/reward_score/prime_math/grader.py +++ b/verl/utils/reward_score/prime_math/grader.py @@ -95,7 +95,6 @@ import contextlib import math import re -import signal from math import isclose from typing import Union diff --git a/verl/workers/megatron_workers.py b/verl/workers/megatron_workers.py index 3cbcacc6c76..2828448cf18 100644 --- a/verl/workers/megatron_workers.py +++ b/verl/workers/megatron_workers.py @@ -262,6 +262,7 @@ def _build_rollout(self, trust_remote_code=False): log_gpu_memory_usage("After building sharding manager", logger=logger) elif self.config.rollout.name == 'sglang': from verl.workers.rollout.sglang_rollout import SGLangRollout + # NOTE(linjunrong): Due to recent fp8 support in SGLang. Now importing any symbol relate to SGLang's model_runner would check CUDA device capability. # However, due to veRL's setting, the main process of ray can not find any CUDA device, which would potentially lead to: # "RuntimeError: No CUDA GPUs are available". diff --git a/verl/workers/sharding_manager/megatron_sglang.py b/verl/workers/sharding_manager/megatron_sglang.py index 5d4167916ff..817867a5a49 100644 --- a/verl/workers/sharding_manager/megatron_sglang.py +++ b/verl/workers/sharding_manager/megatron_sglang.py @@ -15,19 +15,13 @@ This file contains a Megatron style Hybrid Engine that shares the weights of the actor with the inference engine. """ -import importlib import logging import os + import torch -import torch.distributed as dist from torch import nn -from verl.utils.model import normalize_model_name -from verl.utils.megatron_utils import broadcast_from_megatron_pp, broadcast_str_from_megatron_pp - -from verl.utils.megatron_utils import get_model, unwrap_model from verl.utils.debug import log_gpu_memory_usage -from verl.utils.megatron_utils import convert_megatron_model_to_transformers_model logger = logging.getLogger(__file__) logger.setLevel(os.getenv('VERL_PPO_LOGGING_LEVEL', 'WARN')) @@ -40,23 +34,14 @@ - After inference, all the parameters that doesn't belong to this pp rank is freed. """ -from .base import BaseShardingManager - -import torch -from torch import nn import torch.distributed +from sglang.srt.entrypoints.verl_engine import VerlEngine from torch.distributed import new_group -from torch.distributed._tensor import DTensor -from typing import Dict, Iterable, Union, Tuple -from verl import DataProto -from verl.protocol import all_gather_data_proto -from verl.utils.torch_functional import (broadcast_dict_tensor, allgather_dict_tensors) -from sglang.srt.entrypoints.verl_engine import VerlEngine from verl.utils.debug import GPUMemoryLogger +from verl.utils.megatron_utils import per_tensor_generator -import verl.utils.megatron.tensor_parallel as tp_utils -from verl.utils.megatron_utils import per_tensor_generator, default_tp_concat_fn +from .base import BaseShardingManager _MICRO_DATA_PARALLEL_GROUP = None