diff --git a/.buildkite/test-amd.yaml b/.buildkite/test-amd.yaml index f6354758b40..ed1b6e72cc0 100644 --- a/.buildkite/test-amd.yaml +++ b/.buildkite/test-amd.yaml @@ -101,6 +101,7 @@ steps: - export VLLM_LOGGING_LEVEL=DEBUG - export VLLM_WORKER_MULTIPROC_METHOD=spawn - export VLLM_TEST_CLEAN_GPU_MEMORY="1" + - export VLLM_ROCM_USE_AITER=0 - pytest -s -v tests/e2e/offline_inference/test_qwen3_omni.py - pytest -s -v tests/e2e/online_serving/test_qwen3_omni.py diff --git a/vllm_omni/engine/async_omni_engine.py b/vllm_omni/engine/async_omni_engine.py index 4405cb2eb4e..4a20e4bc48a 100644 --- a/vllm_omni/engine/async_omni_engine.py +++ b/vllm_omni/engine/async_omni_engine.py @@ -350,9 +350,9 @@ def _launch_llm_stage( ) finally: if previous_visible_devices is None: - os.environ.pop(device_control_env, None) + current_omni_platform.unset_device_control_env_var() else: - os.environ[device_control_env] = previous_visible_devices + current_omni_platform.set_device_control_env_var(previous_visible_devices) logger.info("[AsyncOmniEngine] Stage %s engine launch started", metadata.stage_id) launch_cm.__exit__(None, None, None) diff --git a/vllm_omni/entrypoints/stage_utils.py b/vllm_omni/entrypoints/stage_utils.py index 317d54322f3..1f45ff9043e 100644 --- a/vllm_omni/entrypoints/stage_utils.py +++ b/vllm_omni/entrypoints/stage_utils.py @@ -72,7 +72,7 @@ def set_stage_devices( else: mapped_devices.append(str(idx)) mapped_devices_str = ",".join(mapped_devices) - os.environ[env_var] = mapped_devices_str + current_omni_platform.set_device_control_env_var(mapped_devices_str) if toks: try: selected_physical = int(mapped_devices[0]) @@ -99,7 +99,7 @@ def set_stage_devices( selected_physical = None if selected_physical is None: selected_physical = int(logical_idx) - os.environ[env_var] = str(selected_physical) + current_omni_platform.set_device_control_env_var(str(selected_physical)) logger.debug( "[Stage-%s] Logical index %d -> physical %s; set %s to single device", stage_id, @@ -111,7 +111,7 @@ def set_stage_devices( logger.debug("[Stage-%s] Using default device visibility (devices=%s)", stage_id, devices) else: selected_physical = int(str(devices)) - os.environ[env_var] = str(selected_physical) + current_omni_platform.set_device_control_env_var(str(selected_physical)) logger.debug("[Stage-%s] Set %s to single device %s (fallback)", stage_id, env_var, selected_physical) except Exception as e: logger.warning("Failed to interpret devices for stage %s: %s", stage_id, e) diff --git a/vllm_omni/platforms/interface.py b/vllm_omni/platforms/interface.py index 314cb3219e5..d86e49e0845 100644 --- a/vllm_omni/platforms/interface.py +++ b/vllm_omni/platforms/interface.py @@ -113,6 +113,18 @@ def get_free_memory(cls, device: torch.device | None = None) -> int: def supports_cpu_offload(cls) -> bool: return True + @classmethod + def set_device_control_env_var(cls, devices: str | int | None) -> None: + import os + + os.environ[cls.device_control_env_var] = devices + + @classmethod + def unset_device_control_env_var(cls) -> None: + import os + + os.environ.pop(cls.device_control_env_var, None) + class UnspecifiedOmniPlatform(OmniPlatform): _omni_enum = OmniPlatformEnum.UNSPECIFIED diff --git a/vllm_omni/platforms/rocm/platform.py b/vllm_omni/platforms/rocm/platform.py index 14534f3a13c..4479e54f2a2 100644 --- a/vllm_omni/platforms/rocm/platform.py +++ b/vllm_omni/platforms/rocm/platform.py @@ -99,3 +99,17 @@ def synchronize(cls) -> None: def get_free_memory(cls, device: torch.device | None = None) -> int: free, _ = torch.cuda.mem_get_info(device) return free + + @classmethod + def set_device_control_env_var(cls, devices: str | int | None) -> None: + import os + + os.environ["HIP_VISIBLE_DEVICES"] = devices + os.environ["CUDA_VISIBLE_DEVICES"] = devices + + @classmethod + def unset_device_control_env_var(cls) -> None: + import os + + os.environ.pop("HIP_VISIBLE_DEVICES", None) + os.environ.pop("CUDA_VISIBLE_DEVICES", None)