diff --git a/examples/configs/grpo_math_1B.yaml b/examples/configs/grpo_math_1B.yaml index 283a3d9c31..85cc620b62 100644 --- a/examples/configs/grpo_math_1B.yaml +++ b/examples/configs/grpo_math_1B.yaml @@ -101,7 +101,7 @@ policy: stop_token_ids: null stop_strings: null vllm_cfg: - async_engine: false # Only for internal testing, will be enabled by https://github.com/NVIDIA/NeMo-RL/issues/447. + async_engine: false precision: ${policy.precision} tensor_parallel_size: 1 pipeline_parallel_size: 1 diff --git a/nemo_rl/models/generation/vllm.py b/nemo_rl/models/generation/vllm.py index 3bf64b2652..0b0bb00ad6 100644 --- a/nemo_rl/models/generation/vllm.py +++ b/nemo_rl/models/generation/vllm.py @@ -1100,6 +1100,12 @@ def __init__( """Initialize a vLLM policy with distributed workers.""" # Store config self.cfg = config + if self.cfg["vllm_cfg"]["pipeline_parallel_size"] > 1: + assert self.cfg["vllm_cfg"]["async_engine"], ( + "When pipeline_parallel_size > 1, async_engine must be set to True in the vLLM configuration. " + "You can enable it by adding `policy.generation.vllm_cfg.async_engine=true` to your command." + ) + # Ensure all required VllmConfig fields are present missing_keys = [ key for key in VllmConfig.__required_keys__ if key not in self.cfg