NVIDIA-NeMo · terrykong · Jun 26, 2025 · Jun 26, 2025
@@ -101,7 +101,7 @@ policy:
     stop_token_ids: null
     stop_strings: null
     vllm_cfg:
-      async_engine: false # Only for internal testing, will be enabled by https://github.com/NVIDIA/NeMo-RL/issues/447.
+      async_engine: false
       precision: ${policy.precision}
       tensor_parallel_size: 1
       pipeline_parallel_size: 1

@@ -1100,6 +1100,12 @@ def __init__(
         """Initialize a vLLM policy with distributed workers."""
         # Store config
         self.cfg = config
+        if self.cfg["vllm_cfg"]["pipeline_parallel_size"] > 1:
+            assert self.cfg["vllm_cfg"]["async_engine"], (
+                "When pipeline_parallel_size > 1, async_engine must be set to True in the vLLM configuration. "
+                "You can enable it by adding `policy.generation.vllm_cfg.async_engine=true` to your command."
+            )
+
         # Ensure all required VllmConfig fields are present
         missing_keys = [
             key for key in VllmConfig.__required_keys__ if key not in self.cfg