diff --git a/.buildkite/test-amd.yaml b/.buildkite/test-amd.yaml
index f6354758b40..ed1b6e72cc0 100644
--- a/.buildkite/test-amd.yaml
+++ b/.buildkite/test-amd.yaml
@@ -101,6 +101,7 @@ steps:
     - export VLLM_LOGGING_LEVEL=DEBUG
     - export VLLM_WORKER_MULTIPROC_METHOD=spawn
     - export VLLM_TEST_CLEAN_GPU_MEMORY="1"
+    - export VLLM_ROCM_USE_AITER=0
     - pytest -s -v tests/e2e/offline_inference/test_qwen3_omni.py
     - pytest -s -v tests/e2e/online_serving/test_qwen3_omni.py
 
diff --git a/vllm_omni/engine/async_omni_engine.py b/vllm_omni/engine/async_omni_engine.py
index 4405cb2eb4e..4a20e4bc48a 100644
--- a/vllm_omni/engine/async_omni_engine.py
+++ b/vllm_omni/engine/async_omni_engine.py
@@ -350,9 +350,9 @@ def _launch_llm_stage(
                     )
                 finally:
                     if previous_visible_devices is None:
-                        os.environ.pop(device_control_env, None)
+                        current_omni_platform.unset_device_control_env_var()
                     else:
-                        os.environ[device_control_env] = previous_visible_devices
+                        current_omni_platform.set_device_control_env_var(previous_visible_devices)
 
             logger.info("[AsyncOmniEngine] Stage %s engine launch started", metadata.stage_id)
             launch_cm.__exit__(None, None, None)
diff --git a/vllm_omni/entrypoints/stage_utils.py b/vllm_omni/entrypoints/stage_utils.py
index 317d54322f3..1f45ff9043e 100644
--- a/vllm_omni/entrypoints/stage_utils.py
+++ b/vllm_omni/entrypoints/stage_utils.py
@@ -72,7 +72,7 @@ def set_stage_devices(
                 else:
                     mapped_devices.append(str(idx))
             mapped_devices_str = ",".join(mapped_devices)
-            os.environ[env_var] = mapped_devices_str
+            current_omni_platform.set_device_control_env_var(mapped_devices_str)
             if toks:
                 try:
                     selected_physical = int(mapped_devices[0])
@@ -99,7 +99,7 @@ def set_stage_devices(
                     selected_physical = None
             if selected_physical is None:
                 selected_physical = int(logical_idx)
-            os.environ[env_var] = str(selected_physical)
+            current_omni_platform.set_device_control_env_var(str(selected_physical))
             logger.debug(
                 "[Stage-%s] Logical index %d -> physical %s; set %s to single device",
                 stage_id,
@@ -111,7 +111,7 @@ def set_stage_devices(
             logger.debug("[Stage-%s] Using default device visibility (devices=%s)", stage_id, devices)
         else:
             selected_physical = int(str(devices))
-            os.environ[env_var] = str(selected_physical)
+            current_omni_platform.set_device_control_env_var(str(selected_physical))
             logger.debug("[Stage-%s] Set %s to single device %s (fallback)", stage_id, env_var, selected_physical)
     except Exception as e:
         logger.warning("Failed to interpret devices for stage %s: %s", stage_id, e)
diff --git a/vllm_omni/platforms/interface.py b/vllm_omni/platforms/interface.py
index 314cb3219e5..d86e49e0845 100644
--- a/vllm_omni/platforms/interface.py
+++ b/vllm_omni/platforms/interface.py
@@ -113,6 +113,18 @@ def get_free_memory(cls, device: torch.device | None = None) -> int:
     def supports_cpu_offload(cls) -> bool:
         return True
 
+    @classmethod
+    def set_device_control_env_var(cls, devices: str | int | None) -> None:
+        import os
+
+        os.environ[cls.device_control_env_var] = devices
+
+    @classmethod
+    def unset_device_control_env_var(cls) -> None:
+        import os
+
+        os.environ.pop(cls.device_control_env_var, None)
+
 
 class UnspecifiedOmniPlatform(OmniPlatform):
     _omni_enum = OmniPlatformEnum.UNSPECIFIED
diff --git a/vllm_omni/platforms/rocm/platform.py b/vllm_omni/platforms/rocm/platform.py
index 14534f3a13c..4479e54f2a2 100644
--- a/vllm_omni/platforms/rocm/platform.py
+++ b/vllm_omni/platforms/rocm/platform.py
@@ -99,3 +99,17 @@ def synchronize(cls) -> None:
     def get_free_memory(cls, device: torch.device | None = None) -> int:
         free, _ = torch.cuda.mem_get_info(device)
         return free
+
+    @classmethod
+    def set_device_control_env_var(cls, devices: str | int | None) -> None:
+        import os
+
+        os.environ["HIP_VISIBLE_DEVICES"] = devices
+        os.environ["CUDA_VISIBLE_DEVICES"] = devices
+
+    @classmethod
+    def unset_device_control_env_var(cls) -> None:
+        import os
+
+        os.environ.pop("HIP_VISIBLE_DEVICES", None)
+        os.environ.pop("CUDA_VISIBLE_DEVICES", None)