[None][chore] Add placement test for ray executor #9122

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged

hchings merged 1 commit into NVIDIA:main from hchings:placement

Nov 15, 2025

tests/unittest/_torch/ray_orchestrator/multi_gpu/test_executor.py

-Original file line number
+Diff line change
@@ -1,10 +1,14 @@
     import os
     import pytest
+    import ray
+    from ray.util.placement_group import (PlacementGroupSchedulingStrategy,
+                                          placement_group, remove_placement_group)
     from utils.llm_data import llm_models_root
     from tensorrt_llm import LLM
     from tensorrt_llm._torch.utils import get_device_uuid
+    from tensorrt_llm.llmapi import KvCacheConfig
     class DummyWorkerExtension:
@@ Expand All / @@ -22,17 +26,68 @@ def test_worker_extension(): @@
         assert result[0] == "SUCCESS"
+    @pytest.mark.gpu4
+    def test_bundle_indices(monkeypatch):
+        """Placement via bundle indices"""
+        monkeypatch.setenv("RAY_EXPERIMENTAL_NOSET_CUDA_VISIBLE_DEVICES", "1")
+        monkeypatch.setenv("TLLM_RAY_USE_RPC", "1")
+        pg = None
+        try:
+            ray.init()
+            pg = placement_group([{"GPU": 1, "CPU": 1}] * 4)
+            ray.get(pg.ready())
+            print(f"Placement group ready with bundles {pg.bundle_specs}")
+            bundle_indices = [2, 3]
+            runtime_env = {
+                "env_vars": {
+                    "TRTLLM_RAY_PER_WORKER_GPUS": "0.8",
+                    "TRTLLM_RAY_BUNDLE_INDICES": ",".join(map(str, bundle_indices))
+                }
+            }
+            llm = ray.remote(
+                num_cpus=0,
+                num_gpus=0,
+                runtime_env=runtime_env,
+                scheduling_strategy=PlacementGroupSchedulingStrategy(
+                    placement_group=pg,
+                    placement_group_capture_child_tasks=True,
+                ),
+            )(LLM).remote(
+                model=os.path.join(llm_models_root(), "llama-models-v2",
+                                   "TinyLlama-1.1B-Chat-v1.0"),
+                kv_cache_config=KvCacheConfig(free_gpu_memory_fraction=0.1),
+                tensor_parallel_size=2,
+                orchestrator_type="ray",
+            )
+            inference_actor_uuids = ray.get(
+                llm._collective_rpc.remote("report_device_id"))
+            expected_uuids = [get_device_uuid(idx) for idx in bundle_indices]
+            assert sorted(inference_actor_uuids) == sorted(expected_uuids), \
+                f"Workers not placed on expected GPUs. Expected UUIDs: {expected_uuids}, Got: {inference_actor_uuids}"
+        finally:
+            if pg is not None:
+                remove_placement_group(pg)
+            ray.shutdown()
     @pytest.mark.gpu2
-    def test_cuda_visible_device():
+    def test_cuda_visible_device(monkeypatch):
         """Placement via cuda_visible_device"""
-        os.environ["CUDA_VISIBLE_DEVICES"] = "1"
+        monkeypatch.setenv("CUDA_VISIBLE_DEVICES", "1")
         llm = LLM(model=llm_models_root() /
                   "llama-models-v2/TinyLlama-1.1B-Chat-v1.0",
                   orchestrator_type="ray")
         infer_actor_uuids = llm._collective_rpc("report_device_id")
-        del os.environ["CUDA_VISIBLE_DEVICES"]
         assert infer_actor_uuids[0] == get_device_uuid(1)
         print(f"{infer_actor_uuids=}")

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[None][chore] Add placement test for ray executor #9122

Uh oh!

Diff view

Diff view

There are no files selected for viewing

Uh oh!

Uh oh!